| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.08977063602495623, |
| "eval_steps": 500, |
| "global_step": 500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0008977063602495623, |
| "grad_norm": 49.75, |
| "learning_rate": 1.8797356064157479e-06, |
| "logits/chosen": -1.5944416522979736, |
| "logits/rejected": -1.6120755672454834, |
| "logps/chosen": -218.08145141601562, |
| "logps/rejected": -238.650634765625, |
| "loss": 0.69, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": 0.003223979379981756, |
| "rewards/margins": 0.006512450985610485, |
| "rewards/rejected": -0.003288471605628729, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0017954127204991247, |
| "grad_norm": 52.75, |
| "learning_rate": 4.229405114435433e-06, |
| "logits/chosen": -1.638082504272461, |
| "logits/rejected": -1.644774079322815, |
| "logps/chosen": -218.1611785888672, |
| "logps/rejected": -238.66098022460938, |
| "loss": 0.6066, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": 0.07871033251285553, |
| "rewards/margins": 0.1871051788330078, |
| "rewards/rejected": -0.10839483886957169, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.002693119080748687, |
| "grad_norm": 28.625, |
| "learning_rate": 6.579074622455118e-06, |
| "logits/chosen": -1.7269313335418701, |
| "logits/rejected": -1.7303335666656494, |
| "logps/chosen": -226.7982635498047, |
| "logps/rejected": -258.6452331542969, |
| "loss": 0.3006, |
| "rewards/accuracies": 0.965624988079071, |
| "rewards/chosen": -0.6204186081886292, |
| "rewards/margins": 1.243154764175415, |
| "rewards/rejected": -1.8635733127593994, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0035908254409982494, |
| "grad_norm": 7.84375, |
| "learning_rate": 8.928744130474802e-06, |
| "logits/chosen": -1.7967208623886108, |
| "logits/rejected": -1.814859390258789, |
| "logps/chosen": -229.9993133544922, |
| "logps/rejected": -288.5595397949219, |
| "loss": 0.0873, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": -1.8708345890045166, |
| "rewards/margins": 3.8205904960632324, |
| "rewards/rejected": -5.691425323486328, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.004488531801247812, |
| "grad_norm": 14.875, |
| "learning_rate": 1.1278413638494489e-05, |
| "logits/chosen": -1.7308677434921265, |
| "logits/rejected": -1.7561269998550415, |
| "logps/chosen": -243.46029663085938, |
| "logps/rejected": -340.27764892578125, |
| "loss": 0.018, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -3.0787580013275146, |
| "rewards/margins": 7.586331367492676, |
| "rewards/rejected": -10.665090560913086, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.005386238161497374, |
| "grad_norm": 8.25, |
| "learning_rate": 1.3628083146514173e-05, |
| "logits/chosen": -1.6984357833862305, |
| "logits/rejected": -1.7242708206176758, |
| "logps/chosen": -264.53125, |
| "logps/rejected": -376.6803894042969, |
| "loss": 0.0285, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -4.747864246368408, |
| "rewards/margins": 9.422819137573242, |
| "rewards/rejected": -14.170684814453125, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.006283944521746937, |
| "grad_norm": 4.1875, |
| "learning_rate": 1.5977752654533858e-05, |
| "logits/chosen": -1.6455342769622803, |
| "logits/rejected": -1.6728187799453735, |
| "logps/chosen": -281.365966796875, |
| "logps/rejected": -407.96337890625, |
| "loss": 0.0216, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -6.47125768661499, |
| "rewards/margins": 10.662993431091309, |
| "rewards/rejected": -17.13425064086914, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.007181650881996499, |
| "grad_norm": 6.3125, |
| "learning_rate": 1.6447684804072058e-05, |
| "logits/chosen": -1.5919939279556274, |
| "logits/rejected": -1.617920160293579, |
| "logps/chosen": -278.3464660644531, |
| "logps/rejected": -404.8271484375, |
| "loss": 0.0342, |
| "rewards/accuracies": 0.9781249761581421, |
| "rewards/chosen": -6.385420799255371, |
| "rewards/margins": 10.654914855957031, |
| "rewards/rejected": -17.040334701538086, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.00807935724224606, |
| "grad_norm": 6.625, |
| "learning_rate": 1.6447677686306693e-05, |
| "logits/chosen": -1.6035076379776, |
| "logits/rejected": -1.6163572072982788, |
| "logps/chosen": -279.767822265625, |
| "logps/rejected": -383.13458251953125, |
| "loss": 0.0432, |
| "rewards/accuracies": 0.9593750238418579, |
| "rewards/chosen": -5.704493999481201, |
| "rewards/margins": 8.454301834106445, |
| "rewards/rejected": -14.158796310424805, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.008977063602495623, |
| "grad_norm": 4.8125, |
| "learning_rate": 1.6447665093343918e-05, |
| "logits/chosen": -1.6678664684295654, |
| "logits/rejected": -1.6700479984283447, |
| "logps/chosen": -264.4732971191406, |
| "logps/rejected": -363.1940612792969, |
| "loss": 0.0464, |
| "rewards/accuracies": 0.971875011920929, |
| "rewards/chosen": -5.7117018699646, |
| "rewards/margins": 7.980559349060059, |
| "rewards/rejected": -13.692262649536133, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.009874769962745186, |
| "grad_norm": 4.65625, |
| "learning_rate": 1.6447647025194904e-05, |
| "logits/chosen": -1.5799241065979004, |
| "logits/rejected": -1.5821675062179565, |
| "logps/chosen": -268.1691589355469, |
| "logps/rejected": -374.00518798828125, |
| "loss": 0.0205, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -5.694095134735107, |
| "rewards/margins": 8.601041793823242, |
| "rewards/rejected": -14.295137405395508, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.010772476322994749, |
| "grad_norm": 4.625, |
| "learning_rate": 1.6447623481875693e-05, |
| "logits/chosen": -1.615523338317871, |
| "logits/rejected": -1.6053167581558228, |
| "logps/chosen": -269.4774475097656, |
| "logps/rejected": -389.2843933105469, |
| "loss": 0.0186, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -5.536202907562256, |
| "rewards/margins": 9.901906967163086, |
| "rewards/rejected": -15.4381103515625, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.011670182683244311, |
| "grad_norm": 5.0625, |
| "learning_rate": 1.644759446340718e-05, |
| "logits/chosen": -1.62222158908844, |
| "logits/rejected": -1.6126108169555664, |
| "logps/chosen": -273.30316162109375, |
| "logps/rejected": -401.03851318359375, |
| "loss": 0.0237, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -5.82913875579834, |
| "rewards/margins": 10.689005851745605, |
| "rewards/rejected": -16.518144607543945, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.012567889043493874, |
| "grad_norm": 5.0625, |
| "learning_rate": 1.644755996981513e-05, |
| "logits/chosen": -1.6640081405639648, |
| "logits/rejected": -1.6507833003997803, |
| "logps/chosen": -274.74053955078125, |
| "logps/rejected": -404.7392883300781, |
| "loss": 0.0319, |
| "rewards/accuracies": 0.971875011920929, |
| "rewards/chosen": -5.577990531921387, |
| "rewards/margins": 10.969307899475098, |
| "rewards/rejected": -16.547298431396484, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.013465595403743435, |
| "grad_norm": 30.25, |
| "learning_rate": 1.6447520001130158e-05, |
| "logits/chosen": -1.5772068500518799, |
| "logits/rejected": -1.5707197189331055, |
| "logps/chosen": -278.74658203125, |
| "logps/rejected": -406.4679260253906, |
| "loss": 0.0555, |
| "rewards/accuracies": 0.971875011920929, |
| "rewards/chosen": -6.121307849884033, |
| "rewards/margins": 10.843521118164062, |
| "rewards/rejected": -16.96483039855957, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.014363301763992998, |
| "grad_norm": 3.109375, |
| "learning_rate": 1.6447474557387748e-05, |
| "logits/chosen": -1.53738534450531, |
| "logits/rejected": -1.534790277481079, |
| "logps/chosen": -314.1097717285156, |
| "logps/rejected": -420.47308349609375, |
| "loss": 0.0185, |
| "rewards/accuracies": 0.9906250238418579, |
| "rewards/chosen": -9.349452018737793, |
| "rewards/margins": 8.684282302856445, |
| "rewards/rejected": -18.033733367919922, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.01526100812424256, |
| "grad_norm": 4.03125, |
| "learning_rate": 1.6447423638628237e-05, |
| "logits/chosen": -1.5148117542266846, |
| "logits/rejected": -1.5227998495101929, |
| "logps/chosen": -316.7289123535156, |
| "logps/rejected": -430.1024475097656, |
| "loss": 0.0115, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": -10.431829452514648, |
| "rewards/margins": 9.403815269470215, |
| "rewards/rejected": -19.835643768310547, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.01615871448449212, |
| "grad_norm": 3.84375, |
| "learning_rate": 1.6447367244896826e-05, |
| "logits/chosen": -1.606527328491211, |
| "logits/rejected": -1.6100928783416748, |
| "logps/chosen": -319.0159606933594, |
| "logps/rejected": -440.75738525390625, |
| "loss": 0.0257, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -10.429452896118164, |
| "rewards/margins": 10.196511268615723, |
| "rewards/rejected": -20.625965118408203, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.017056420844741686, |
| "grad_norm": 24.125, |
| "learning_rate": 1.644730537624358e-05, |
| "logits/chosen": -1.6584317684173584, |
| "logits/rejected": -1.6650241613388062, |
| "logps/chosen": -315.8913269042969, |
| "logps/rejected": -439.966552734375, |
| "loss": 0.0371, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -10.410109519958496, |
| "rewards/margins": 10.561999320983887, |
| "rewards/rejected": -20.972110748291016, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.017954127204991246, |
| "grad_norm": 13.375, |
| "learning_rate": 1.644723803272341e-05, |
| "logits/chosen": -1.6700522899627686, |
| "logits/rejected": -1.6734033823013306, |
| "logps/chosen": -319.60516357421875, |
| "logps/rejected": -441.1056213378906, |
| "loss": 0.0222, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -10.69153881072998, |
| "rewards/margins": 10.231417655944824, |
| "rewards/rejected": -20.922958374023438, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.01885183356524081, |
| "grad_norm": 6.3125, |
| "learning_rate": 1.644716521439611e-05, |
| "logits/chosen": -1.7060085535049438, |
| "logits/rejected": -1.7089792490005493, |
| "logps/chosen": -347.458984375, |
| "logps/rejected": -468.8707580566406, |
| "loss": 0.0224, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -12.833531379699707, |
| "rewards/margins": 10.041297912597656, |
| "rewards/rejected": -22.874828338623047, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.019749539925490372, |
| "grad_norm": 10.8125, |
| "learning_rate": 1.644708692132631e-05, |
| "logits/chosen": -1.7060142755508423, |
| "logits/rejected": -1.7084630727767944, |
| "logps/chosen": -356.79656982421875, |
| "logps/rejected": -479.97235107421875, |
| "loss": 0.0264, |
| "rewards/accuracies": 0.9906250238418579, |
| "rewards/chosen": -13.447436332702637, |
| "rewards/margins": 10.534585952758789, |
| "rewards/rejected": -23.982025146484375, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.020647246285739933, |
| "grad_norm": 6.15625, |
| "learning_rate": 1.6447003153583514e-05, |
| "logits/chosen": -1.642289161682129, |
| "logits/rejected": -1.6480754613876343, |
| "logps/chosen": -356.51251220703125, |
| "logps/rejected": -481.41827392578125, |
| "loss": 0.0122, |
| "rewards/accuracies": 0.9906250238418579, |
| "rewards/chosen": -13.347066879272461, |
| "rewards/margins": 10.536771774291992, |
| "rewards/rejected": -23.883838653564453, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.021544952645989497, |
| "grad_norm": 8.3125, |
| "learning_rate": 1.644691391124208e-05, |
| "logits/chosen": -1.6251140832901, |
| "logits/rejected": -1.6293065547943115, |
| "logps/chosen": -362.5952453613281, |
| "logps/rejected": -487.474853515625, |
| "loss": 0.0638, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -14.098132133483887, |
| "rewards/margins": 10.43175983428955, |
| "rewards/rejected": -24.529891967773438, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.022442659006239058, |
| "grad_norm": 0.9375, |
| "learning_rate": 1.6446819194381232e-05, |
| "logits/chosen": -1.6321861743927002, |
| "logits/rejected": -1.6401519775390625, |
| "logps/chosen": -365.3531188964844, |
| "logps/rejected": -472.98590087890625, |
| "loss": 0.0341, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -14.91484546661377, |
| "rewards/margins": 8.75818920135498, |
| "rewards/rejected": -23.673038482666016, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.023340365366488623, |
| "grad_norm": 16.125, |
| "learning_rate": 1.6446719003085048e-05, |
| "logits/chosen": -1.682080864906311, |
| "logits/rejected": -1.6901333332061768, |
| "logps/chosen": -375.67047119140625, |
| "logps/rejected": -475.9076232910156, |
| "loss": 0.0263, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -14.836702346801758, |
| "rewards/margins": 8.173124313354492, |
| "rewards/rejected": -23.00982666015625, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.024238071726738183, |
| "grad_norm": 5.09375, |
| "learning_rate": 1.6446613337442464e-05, |
| "logits/chosen": -1.7631546258926392, |
| "logits/rejected": -1.75924813747406, |
| "logps/chosen": -334.9482421875, |
| "logps/rejected": -438.7393493652344, |
| "loss": 0.0274, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -12.050386428833008, |
| "rewards/margins": 8.398730278015137, |
| "rewards/rejected": -20.44911766052246, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.025135778086987748, |
| "grad_norm": 16.125, |
| "learning_rate": 1.6446502197547285e-05, |
| "logits/chosen": -1.6700756549835205, |
| "logits/rejected": -1.6583993434906006, |
| "logps/chosen": -327.59222412109375, |
| "logps/rejected": -443.2696228027344, |
| "loss": 0.0349, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -10.80932903289795, |
| "rewards/margins": 9.692026138305664, |
| "rewards/rejected": -20.501354217529297, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.02603348444723731, |
| "grad_norm": 6.5625, |
| "learning_rate": 1.6446385583498166e-05, |
| "logits/chosen": -1.603623628616333, |
| "logits/rejected": -1.5888742208480835, |
| "logps/chosen": -325.58660888671875, |
| "logps/rejected": -446.7493591308594, |
| "loss": 0.0473, |
| "rewards/accuracies": 0.96875, |
| "rewards/chosen": -11.347869873046875, |
| "rewards/margins": 10.037898063659668, |
| "rewards/rejected": -21.38576889038086, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.02693119080748687, |
| "grad_norm": 5.21875, |
| "learning_rate": 1.6446263495398625e-05, |
| "logits/chosen": -1.6120811700820923, |
| "logits/rejected": -1.5870082378387451, |
| "logps/chosen": -317.60162353515625, |
| "logps/rejected": -438.42633056640625, |
| "loss": 0.0193, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -9.432976722717285, |
| "rewards/margins": 10.237409591674805, |
| "rewards/rejected": -19.670385360717773, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.027828897167736434, |
| "grad_norm": 3.4375, |
| "learning_rate": 1.644613593335704e-05, |
| "logits/chosen": -1.5875444412231445, |
| "logits/rejected": -1.5749518871307373, |
| "logps/chosen": -303.3853454589844, |
| "logps/rejected": -432.47808837890625, |
| "loss": 0.0154, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": -8.602149963378906, |
| "rewards/margins": 10.840978622436523, |
| "rewards/rejected": -19.443126678466797, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.028726603527985995, |
| "grad_norm": 3.9375, |
| "learning_rate": 1.6446002897486648e-05, |
| "logits/chosen": -1.618011236190796, |
| "logits/rejected": -1.6145331859588623, |
| "logps/chosen": -312.6946105957031, |
| "logps/rejected": -446.9215393066406, |
| "loss": 0.0265, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -8.892807960510254, |
| "rewards/margins": 11.515321731567383, |
| "rewards/rejected": -20.408130645751953, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.02962430988823556, |
| "grad_norm": 5.96875, |
| "learning_rate": 1.644586438790554e-05, |
| "logits/chosen": -1.5836814641952515, |
| "logits/rejected": -1.587181806564331, |
| "logps/chosen": -306.9125061035156, |
| "logps/rejected": -445.1659240722656, |
| "loss": 0.0256, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -9.431074142456055, |
| "rewards/margins": 11.796531677246094, |
| "rewards/rejected": -21.22760581970215, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.03052201624848512, |
| "grad_norm": 11.125, |
| "learning_rate": 1.6445720404736678e-05, |
| "logits/chosen": -1.6508190631866455, |
| "logits/rejected": -1.65244460105896, |
| "logps/chosen": -310.2176208496094, |
| "logps/rejected": -440.75714111328125, |
| "loss": 0.0288, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -9.615917205810547, |
| "rewards/margins": 11.127912521362305, |
| "rewards/rejected": -20.743831634521484, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.031419722608734685, |
| "grad_norm": 7.0, |
| "learning_rate": 1.644557094810787e-05, |
| "logits/chosen": -1.7216987609863281, |
| "logits/rejected": -1.7154676914215088, |
| "logps/chosen": -340.46466064453125, |
| "logps/rejected": -450.27294921875, |
| "loss": 0.0397, |
| "rewards/accuracies": 0.9906250238418579, |
| "rewards/chosen": -11.357550621032715, |
| "rewards/margins": 8.966830253601074, |
| "rewards/rejected": -20.32438087463379, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.03231742896898424, |
| "grad_norm": 24.0, |
| "learning_rate": 1.6445416018151788e-05, |
| "logits/chosen": -1.7959930896759033, |
| "logits/rejected": -1.800244927406311, |
| "logps/chosen": -331.75506591796875, |
| "logps/rejected": -427.2474670410156, |
| "loss": 0.0207, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -12.001955032348633, |
| "rewards/margins": 7.540495872497559, |
| "rewards/rejected": -19.542451858520508, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.03321513532923381, |
| "grad_norm": 43.75, |
| "learning_rate": 1.644525561500596e-05, |
| "logits/chosen": -1.9910930395126343, |
| "logits/rejected": -1.9894500970840454, |
| "logps/chosen": -350.335693359375, |
| "logps/rejected": -449.16796875, |
| "loss": 0.0458, |
| "rewards/accuracies": 0.96875, |
| "rewards/chosen": -13.615007400512695, |
| "rewards/margins": 8.14229679107666, |
| "rewards/rejected": -21.757305145263672, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.03411284168948337, |
| "grad_norm": 13.3125, |
| "learning_rate": 1.6445089738812785e-05, |
| "logits/chosen": -1.9771511554718018, |
| "logits/rejected": -1.9749290943145752, |
| "logps/chosen": -348.55828857421875, |
| "logps/rejected": -453.4300842285156, |
| "loss": 0.0432, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": -13.186309814453125, |
| "rewards/margins": 8.536267280578613, |
| "rewards/rejected": -21.722576141357422, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.035010548049732935, |
| "grad_norm": 0.146484375, |
| "learning_rate": 1.6444918389719505e-05, |
| "logits/chosen": -1.9536895751953125, |
| "logits/rejected": -1.9450359344482422, |
| "logps/chosen": -335.36553955078125, |
| "logps/rejected": -448.2103576660156, |
| "loss": 0.0296, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -11.7564697265625, |
| "rewards/margins": 9.423359870910645, |
| "rewards/rejected": -21.179828643798828, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.03590825440998249, |
| "grad_norm": 20.25, |
| "learning_rate": 1.644474156787822e-05, |
| "logits/chosen": -1.878861665725708, |
| "logits/rejected": -1.8584403991699219, |
| "logps/chosen": -319.38067626953125, |
| "logps/rejected": -443.59033203125, |
| "loss": 0.0307, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -10.937708854675293, |
| "rewards/margins": 10.651717185974121, |
| "rewards/rejected": -21.589426040649414, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.03680596077023206, |
| "grad_norm": 17.875, |
| "learning_rate": 1.6444559273445908e-05, |
| "logits/chosen": -1.6908838748931885, |
| "logits/rejected": -1.6792293787002563, |
| "logps/chosen": -321.28009033203125, |
| "logps/rejected": -460.1475524902344, |
| "loss": 0.022, |
| "rewards/accuracies": 0.9906250238418579, |
| "rewards/chosen": -10.206222534179688, |
| "rewards/margins": 11.932024955749512, |
| "rewards/rejected": -22.138248443603516, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.03770366713048162, |
| "grad_norm": 7.0, |
| "learning_rate": 1.6444371506584377e-05, |
| "logits/chosen": -1.6957308053970337, |
| "logits/rejected": -1.690157175064087, |
| "logps/chosen": -290.0798034667969, |
| "logps/rejected": -412.7769470214844, |
| "loss": 0.018, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -7.47702693939209, |
| "rewards/margins": 10.390680313110352, |
| "rewards/rejected": -17.86771011352539, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.03860137349073118, |
| "grad_norm": 8.5625, |
| "learning_rate": 1.644417826746031e-05, |
| "logits/chosen": -1.650665521621704, |
| "logits/rejected": -1.6541109085083008, |
| "logps/chosen": -290.309326171875, |
| "logps/rejected": -417.1390686035156, |
| "loss": 0.0184, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -7.279687404632568, |
| "rewards/margins": 10.711103439331055, |
| "rewards/rejected": -17.990793228149414, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.039499079850980744, |
| "grad_norm": 8.4375, |
| "learning_rate": 1.6443979556245252e-05, |
| "logits/chosen": -1.6047160625457764, |
| "logits/rejected": -1.6234970092773438, |
| "logps/chosen": -322.4940185546875, |
| "logps/rejected": -458.4981384277344, |
| "loss": 0.0263, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -9.725212097167969, |
| "rewards/margins": 11.667040824890137, |
| "rewards/rejected": -21.39225196838379, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.04039678621123031, |
| "grad_norm": 23.5, |
| "learning_rate": 1.6443775373115592e-05, |
| "logits/chosen": -1.5689036846160889, |
| "logits/rejected": -1.5908584594726562, |
| "logps/chosen": -343.6766052246094, |
| "logps/rejected": -472.40948486328125, |
| "loss": 0.0679, |
| "rewards/accuracies": 0.9781249761581421, |
| "rewards/chosen": -11.692893028259277, |
| "rewards/margins": 10.971635818481445, |
| "rewards/rejected": -22.66452980041504, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.041294492571479866, |
| "grad_norm": 8.6875, |
| "learning_rate": 1.6443565718252586e-05, |
| "logits/chosen": -1.5273631811141968, |
| "logits/rejected": -1.5362848043441772, |
| "logps/chosen": -333.0375061035156, |
| "logps/rejected": -450.404541015625, |
| "loss": 0.0259, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -11.445978164672852, |
| "rewards/margins": 9.806886672973633, |
| "rewards/rejected": -21.25286293029785, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.04219219893172943, |
| "grad_norm": 9.25, |
| "learning_rate": 1.644335059184234e-05, |
| "logits/chosen": -1.4887597560882568, |
| "logits/rejected": -1.5063436031341553, |
| "logps/chosen": -334.2455139160156, |
| "logps/rejected": -466.03326416015625, |
| "loss": 0.0239, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -12.205533981323242, |
| "rewards/margins": 10.970166206359863, |
| "rewards/rejected": -23.175701141357422, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.043089905291978994, |
| "grad_norm": 5.125, |
| "learning_rate": 1.644312999407582e-05, |
| "logits/chosen": -1.4916335344314575, |
| "logits/rejected": -1.5103265047073364, |
| "logps/chosen": -339.73175048828125, |
| "logps/rejected": -466.05645751953125, |
| "loss": 0.0523, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -12.136640548706055, |
| "rewards/margins": 10.656683921813965, |
| "rewards/rejected": -22.793325424194336, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.04398761165222856, |
| "grad_norm": 6.1875, |
| "learning_rate": 1.644290392514886e-05, |
| "logits/chosen": -1.4491441249847412, |
| "logits/rejected": -1.4810426235198975, |
| "logps/chosen": -330.1545104980469, |
| "logps/rejected": -447.7718811035156, |
| "loss": 0.0283, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -11.471087455749512, |
| "rewards/margins": 9.670295715332031, |
| "rewards/rejected": -21.14138412475586, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.044885318012478116, |
| "grad_norm": 9.75, |
| "learning_rate": 1.6442672385262126e-05, |
| "logits/chosen": -1.3768192529678345, |
| "logits/rejected": -1.4130717515945435, |
| "logps/chosen": -315.2447204589844, |
| "logps/rejected": -440.4739685058594, |
| "loss": 0.0267, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -9.256295204162598, |
| "rewards/margins": 10.560578346252441, |
| "rewards/rejected": -19.81687355041504, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.04578302437272768, |
| "grad_norm": 1.8125, |
| "learning_rate": 1.6442435374621164e-05, |
| "logits/chosen": -1.3219325542449951, |
| "logits/rejected": -1.3581187725067139, |
| "logps/chosen": -295.7016906738281, |
| "logps/rejected": -422.5862731933594, |
| "loss": 0.0325, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -8.51601791381836, |
| "rewards/margins": 10.66891098022461, |
| "rewards/rejected": -19.18492889404297, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.046680730732977245, |
| "grad_norm": 1.265625, |
| "learning_rate": 1.6442192893436368e-05, |
| "logits/chosen": -1.2778997421264648, |
| "logits/rejected": -1.312280535697937, |
| "logps/chosen": -303.11151123046875, |
| "logps/rejected": -420.34912109375, |
| "loss": 0.024, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -8.392851829528809, |
| "rewards/margins": 9.67725658416748, |
| "rewards/rejected": -18.070110321044922, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.0475784370932268, |
| "grad_norm": 2.765625, |
| "learning_rate": 1.644194494192298e-05, |
| "logits/chosen": -1.2928860187530518, |
| "logits/rejected": -1.3260154724121094, |
| "logps/chosen": -286.2437438964844, |
| "logps/rejected": -393.56231689453125, |
| "loss": 0.0281, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -7.124878883361816, |
| "rewards/margins": 8.837203979492188, |
| "rewards/rejected": -15.962081909179688, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.04847614345347637, |
| "grad_norm": 16.125, |
| "learning_rate": 1.6441691520301115e-05, |
| "logits/chosen": -1.278626799583435, |
| "logits/rejected": -1.3031818866729736, |
| "logps/chosen": -310.00946044921875, |
| "logps/rejected": -427.1468200683594, |
| "loss": 0.0328, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -9.098076820373535, |
| "rewards/margins": 9.82015609741211, |
| "rewards/rejected": -18.918231964111328, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.04937384981372593, |
| "grad_norm": 2.625, |
| "learning_rate": 1.644143262879573e-05, |
| "logits/chosen": -1.355022668838501, |
| "logits/rejected": -1.377715826034546, |
| "logps/chosen": -317.8216247558594, |
| "logps/rejected": -431.4881896972656, |
| "loss": 0.0385, |
| "rewards/accuracies": 0.9781249761581421, |
| "rewards/chosen": -9.668425559997559, |
| "rewards/margins": 9.433794975280762, |
| "rewards/rejected": -19.102222442626953, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.050271556173975496, |
| "grad_norm": 9.75, |
| "learning_rate": 1.644116826763664e-05, |
| "logits/chosen": -1.3636281490325928, |
| "logits/rejected": -1.384377360343933, |
| "logps/chosen": -302.4226379394531, |
| "logps/rejected": -413.4556579589844, |
| "loss": 0.0475, |
| "rewards/accuracies": 0.971875011920929, |
| "rewards/chosen": -9.204570770263672, |
| "rewards/margins": 9.083320617675781, |
| "rewards/rejected": -18.287891387939453, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.05116926253422505, |
| "grad_norm": 8.125, |
| "learning_rate": 1.6440898437058523e-05, |
| "logits/chosen": -1.340986728668213, |
| "logits/rejected": -1.3553143739700317, |
| "logps/chosen": -313.5350341796875, |
| "logps/rejected": -417.8633728027344, |
| "loss": 0.0624, |
| "rewards/accuracies": 0.9624999761581421, |
| "rewards/chosen": -8.931371688842773, |
| "rewards/margins": 8.754600524902344, |
| "rewards/rejected": -17.68597412109375, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.05206696889447462, |
| "grad_norm": 1.4609375, |
| "learning_rate": 1.64406231373009e-05, |
| "logits/chosen": -1.3220717906951904, |
| "logits/rejected": -1.3361364603042603, |
| "logps/chosen": -301.2831726074219, |
| "logps/rejected": -406.24481201171875, |
| "loss": 0.033, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -9.154109001159668, |
| "rewards/margins": 8.63221263885498, |
| "rewards/rejected": -17.78632164001465, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.05296467525472418, |
| "grad_norm": 5.15625, |
| "learning_rate": 1.6440342368608156e-05, |
| "logits/chosen": -1.2657798528671265, |
| "logits/rejected": -1.2791422605514526, |
| "logps/chosen": -326.9562072753906, |
| "logps/rejected": -439.09356689453125, |
| "loss": 0.061, |
| "rewards/accuracies": 0.965624988079071, |
| "rewards/chosen": -11.56185531616211, |
| "rewards/margins": 9.128196716308594, |
| "rewards/rejected": -20.690053939819336, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.05386238161497374, |
| "grad_norm": 0.0198974609375, |
| "learning_rate": 1.6440056131229532e-05, |
| "logits/chosen": -1.2754865884780884, |
| "logits/rejected": -1.2849574089050293, |
| "logps/chosen": -345.1870422363281, |
| "logps/rejected": -475.9234924316406, |
| "loss": 0.0144, |
| "rewards/accuracies": 0.9906250238418579, |
| "rewards/chosen": -11.819540977478027, |
| "rewards/margins": 11.19267463684082, |
| "rewards/rejected": -23.012216567993164, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.054760087975223304, |
| "grad_norm": 1.828125, |
| "learning_rate": 1.6439764425419112e-05, |
| "logits/chosen": -1.274107813835144, |
| "logits/rejected": -1.2885282039642334, |
| "logps/chosen": -330.4737854003906, |
| "logps/rejected": -473.398681640625, |
| "loss": 0.0267, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -11.204086303710938, |
| "rewards/margins": 12.114290237426758, |
| "rewards/rejected": -23.318378448486328, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.05565779433547287, |
| "grad_norm": 13.9375, |
| "learning_rate": 1.6439467251435852e-05, |
| "logits/chosen": -1.2366708517074585, |
| "logits/rejected": -1.2527769804000854, |
| "logps/chosen": -323.2499694824219, |
| "logps/rejected": -467.12890625, |
| "loss": 0.023, |
| "rewards/accuracies": 0.9906250238418579, |
| "rewards/chosen": -11.250632286071777, |
| "rewards/margins": 12.270976066589355, |
| "rewards/rejected": -23.521610260009766, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.05655550069572243, |
| "grad_norm": 2.21875, |
| "learning_rate": 1.6439164609543545e-05, |
| "logits/chosen": -1.287007212638855, |
| "logits/rejected": -1.315598726272583, |
| "logps/chosen": -315.24200439453125, |
| "logps/rejected": -456.1424255371094, |
| "loss": 0.0308, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -10.730131149291992, |
| "rewards/margins": 11.966339111328125, |
| "rewards/rejected": -22.696468353271484, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.05745320705597199, |
| "grad_norm": 4.03125, |
| "learning_rate": 1.6438856500010842e-05, |
| "logits/chosen": -1.412188172340393, |
| "logits/rejected": -1.4369876384735107, |
| "logps/chosen": -304.62298583984375, |
| "logps/rejected": -442.72808837890625, |
| "loss": 0.0438, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": -9.49343490600586, |
| "rewards/margins": 11.810284614562988, |
| "rewards/rejected": -21.303720474243164, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.058350913416221555, |
| "grad_norm": 3.34375, |
| "learning_rate": 1.643854292311126e-05, |
| "logits/chosen": -1.4016748666763306, |
| "logits/rejected": -1.4312589168548584, |
| "logps/chosen": -321.81982421875, |
| "logps/rejected": -464.9969787597656, |
| "loss": 0.016, |
| "rewards/accuracies": 0.9906250238418579, |
| "rewards/chosen": -9.75760269165039, |
| "rewards/margins": 12.245366096496582, |
| "rewards/rejected": -22.00296974182129, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.05924861977647112, |
| "grad_norm": 8.125, |
| "learning_rate": 1.6438223879123157e-05, |
| "logits/chosen": -1.420204520225525, |
| "logits/rejected": -1.4521286487579346, |
| "logps/chosen": -334.3448486328125, |
| "logps/rejected": -486.096435546875, |
| "loss": 0.0395, |
| "rewards/accuracies": 0.971875011920929, |
| "rewards/chosen": -11.95421028137207, |
| "rewards/margins": 13.074414253234863, |
| "rewards/rejected": -25.02862548828125, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.060146326136720676, |
| "grad_norm": 7.4375, |
| "learning_rate": 1.6437899368329744e-05, |
| "logits/chosen": -1.4968700408935547, |
| "logits/rejected": -1.516984224319458, |
| "logps/chosen": -362.8880310058594, |
| "logps/rejected": -503.55047607421875, |
| "loss": 0.0443, |
| "rewards/accuracies": 0.96875, |
| "rewards/chosen": -13.416742324829102, |
| "rewards/margins": 12.286725997924805, |
| "rewards/rejected": -25.703466415405273, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.06104403249697024, |
| "grad_norm": 3.65625, |
| "learning_rate": 1.643756939101909e-05, |
| "logits/chosen": -1.4715522527694702, |
| "logits/rejected": -1.500880479812622, |
| "logps/chosen": -347.5955505371094, |
| "logps/rejected": -490.23345947265625, |
| "loss": 0.0188, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -13.808749198913574, |
| "rewards/margins": 12.222585678100586, |
| "rewards/rejected": -26.031335830688477, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.061941738857219805, |
| "grad_norm": 0.0283203125, |
| "learning_rate": 1.6437233947484115e-05, |
| "logits/chosen": -1.4634774923324585, |
| "logits/rejected": -1.4903171062469482, |
| "logps/chosen": -346.1238708496094, |
| "logps/rejected": -480.40740966796875, |
| "loss": 0.026, |
| "rewards/accuracies": 0.9781249761581421, |
| "rewards/chosen": -13.692280769348145, |
| "rewards/margins": 11.461533546447754, |
| "rewards/rejected": -25.153812408447266, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.06283944521746937, |
| "grad_norm": 2.734375, |
| "learning_rate": 1.6436893038022587e-05, |
| "logits/chosen": -1.4172029495239258, |
| "logits/rejected": -1.442546010017395, |
| "logps/chosen": -339.7004089355469, |
| "logps/rejected": -476.07861328125, |
| "loss": 0.0222, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -13.242881774902344, |
| "rewards/margins": 11.535491943359375, |
| "rewards/rejected": -24.77837562561035, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.06373715157771893, |
| "grad_norm": 2.515625, |
| "learning_rate": 1.6436546662937136e-05, |
| "logits/chosen": -1.4132306575775146, |
| "logits/rejected": -1.438727855682373, |
| "logps/chosen": -340.2590637207031, |
| "logps/rejected": -478.34759521484375, |
| "loss": 0.03, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -12.395490646362305, |
| "rewards/margins": 11.930428504943848, |
| "rewards/rejected": -24.325918197631836, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.06463485793796848, |
| "grad_norm": 1.9609375, |
| "learning_rate": 1.6436194822535237e-05, |
| "logits/chosen": -1.3696801662445068, |
| "logits/rejected": -1.4052057266235352, |
| "logps/chosen": -332.30072021484375, |
| "logps/rejected": -476.0782165527344, |
| "loss": 0.0272, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -12.618162155151367, |
| "rewards/margins": 12.268811225891113, |
| "rewards/rejected": -24.886974334716797, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.06553256429821805, |
| "grad_norm": 1.421875, |
| "learning_rate": 1.643583751712921e-05, |
| "logits/chosen": -1.3992929458618164, |
| "logits/rejected": -1.4282127618789673, |
| "logps/chosen": -334.0565490722656, |
| "logps/rejected": -480.23785400390625, |
| "loss": 0.0185, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": -11.715566635131836, |
| "rewards/margins": 12.651227951049805, |
| "rewards/rejected": -24.36679458618164, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.06643027065846761, |
| "grad_norm": 5.1875, |
| "learning_rate": 1.6435474747036243e-05, |
| "logits/chosen": -1.453920602798462, |
| "logits/rejected": -1.4755427837371826, |
| "logps/chosen": -322.3940124511719, |
| "logps/rejected": -467.23126220703125, |
| "loss": 0.0371, |
| "rewards/accuracies": 0.9781249761581421, |
| "rewards/chosen": -11.112676620483398, |
| "rewards/margins": 12.558187484741211, |
| "rewards/rejected": -23.67086410522461, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.06732797701871718, |
| "grad_norm": 3.84375, |
| "learning_rate": 1.643510651257836e-05, |
| "logits/chosen": -1.4459034204483032, |
| "logits/rejected": -1.4675936698913574, |
| "logps/chosen": -320.50347900390625, |
| "logps/rejected": -463.59014892578125, |
| "loss": 0.0144, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -10.755430221557617, |
| "rewards/margins": 12.185154914855957, |
| "rewards/rejected": -22.94058609008789, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.06822568337896674, |
| "grad_norm": 0.0186767578125, |
| "learning_rate": 1.6434732814082442e-05, |
| "logits/chosen": -1.4478992223739624, |
| "logits/rejected": -1.4632583856582642, |
| "logps/chosen": -331.53289794921875, |
| "logps/rejected": -466.4090881347656, |
| "loss": 0.0205, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -10.466972351074219, |
| "rewards/margins": 11.554471969604492, |
| "rewards/rejected": -22.021446228027344, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.0691233897392163, |
| "grad_norm": 5.21875, |
| "learning_rate": 1.6434353651880223e-05, |
| "logits/chosen": -1.4576263427734375, |
| "logits/rejected": -1.470090627670288, |
| "logps/chosen": -322.73773193359375, |
| "logps/rejected": -458.134521484375, |
| "loss": 0.0305, |
| "rewards/accuracies": 0.9781249761581421, |
| "rewards/chosen": -10.470837593078613, |
| "rewards/margins": 11.618741035461426, |
| "rewards/rejected": -22.089576721191406, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.07002109609946587, |
| "grad_norm": 2.875, |
| "learning_rate": 1.643396902630828e-05, |
| "logits/chosen": -1.4584577083587646, |
| "logits/rejected": -1.4607679843902588, |
| "logps/chosen": -317.8088073730469, |
| "logps/rejected": -451.45440673828125, |
| "loss": 0.0322, |
| "rewards/accuracies": 0.9781249761581421, |
| "rewards/chosen": -10.205190658569336, |
| "rewards/margins": 11.55008316040039, |
| "rewards/rejected": -21.75527572631836, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.07091880245971542, |
| "grad_norm": 6.96875, |
| "learning_rate": 1.6433578937708046e-05, |
| "logits/chosen": -1.4126781225204468, |
| "logits/rejected": -1.4239190816879272, |
| "logps/chosen": -322.39801025390625, |
| "logps/rejected": -458.4278259277344, |
| "loss": 0.0256, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -9.876721382141113, |
| "rewards/margins": 11.59015941619873, |
| "rewards/rejected": -21.466880798339844, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.07181650881996499, |
| "grad_norm": 0.02001953125, |
| "learning_rate": 1.64331833864258e-05, |
| "logits/chosen": -1.4148705005645752, |
| "logits/rejected": -1.4251186847686768, |
| "logps/chosen": -316.4896545410156, |
| "logps/rejected": -450.53240966796875, |
| "loss": 0.0164, |
| "rewards/accuracies": 0.9906250238418579, |
| "rewards/chosen": -9.625123023986816, |
| "rewards/margins": 11.543768882751465, |
| "rewards/rejected": -21.168895721435547, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.07271421518021455, |
| "grad_norm": 0.890625, |
| "learning_rate": 1.643278237281267e-05, |
| "logits/chosen": -1.421555757522583, |
| "logits/rejected": -1.4265415668487549, |
| "logps/chosen": -308.0295104980469, |
| "logps/rejected": -445.5311584472656, |
| "loss": 0.0249, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -9.17651653289795, |
| "rewards/margins": 11.756936073303223, |
| "rewards/rejected": -20.933452606201172, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.07361192154046411, |
| "grad_norm": 2.140625, |
| "learning_rate": 1.6432375897224637e-05, |
| "logits/chosen": -1.3315099477767944, |
| "logits/rejected": -1.3350989818572998, |
| "logps/chosen": -315.9861755371094, |
| "logps/rejected": -451.1835021972656, |
| "loss": 0.044, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -9.11551284790039, |
| "rewards/margins": 11.56936264038086, |
| "rewards/rejected": -20.68487548828125, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.07450962790071368, |
| "grad_norm": 0.94140625, |
| "learning_rate": 1.6431963960022524e-05, |
| "logits/chosen": -1.2902719974517822, |
| "logits/rejected": -1.2910665273666382, |
| "logps/chosen": -309.9100341796875, |
| "logps/rejected": -442.4892578125, |
| "loss": 0.0278, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -9.109766006469727, |
| "rewards/margins": 11.337265968322754, |
| "rewards/rejected": -20.447031021118164, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.07540733426096324, |
| "grad_norm": 4.28125, |
| "learning_rate": 1.643154656157201e-05, |
| "logits/chosen": -1.2414597272872925, |
| "logits/rejected": -1.2512853145599365, |
| "logps/chosen": -301.22174072265625, |
| "logps/rejected": -416.2430725097656, |
| "loss": 0.0651, |
| "rewards/accuracies": 0.971875011920929, |
| "rewards/chosen": -9.233153343200684, |
| "rewards/margins": 9.558819770812988, |
| "rewards/rejected": -18.79197120666504, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.07630504062121281, |
| "grad_norm": 2.984375, |
| "learning_rate": 1.6431123702243618e-05, |
| "logits/chosen": -1.2505871057510376, |
| "logits/rejected": -1.2604036331176758, |
| "logps/chosen": -319.97369384765625, |
| "logps/rejected": -420.20184326171875, |
| "loss": 0.0256, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -9.754773139953613, |
| "rewards/margins": 7.977179527282715, |
| "rewards/rejected": -17.731952667236328, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.07720274698146236, |
| "grad_norm": 2.75, |
| "learning_rate": 1.6430695382412714e-05, |
| "logits/chosen": -1.2662450075149536, |
| "logits/rejected": -1.2877540588378906, |
| "logps/chosen": -322.77532958984375, |
| "logps/rejected": -426.742919921875, |
| "loss": 0.0443, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -10.8510103225708, |
| "rewards/margins": 8.357492446899414, |
| "rewards/rejected": -19.208499908447266, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.07810045334171192, |
| "grad_norm": 0.039794921875, |
| "learning_rate": 1.6430261602459523e-05, |
| "logits/chosen": -1.291669487953186, |
| "logits/rejected": -1.3137457370758057, |
| "logps/chosen": -338.3268127441406, |
| "logps/rejected": -457.73077392578125, |
| "loss": 0.0177, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -12.022209167480469, |
| "rewards/margins": 10.057284355163574, |
| "rewards/rejected": -22.07949447631836, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.07899815970196149, |
| "grad_norm": 7.125, |
| "learning_rate": 1.6429822362769104e-05, |
| "logits/chosen": -1.2740453481674194, |
| "logits/rejected": -1.2928683757781982, |
| "logps/chosen": -351.74859619140625, |
| "logps/rejected": -467.54345703125, |
| "loss": 0.0661, |
| "rewards/accuracies": 0.96875, |
| "rewards/chosen": -13.225227355957031, |
| "rewards/margins": 9.72153377532959, |
| "rewards/rejected": -22.946762084960938, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.07989586606221105, |
| "grad_norm": 5.0625, |
| "learning_rate": 1.642937766373137e-05, |
| "logits/chosen": -1.2580888271331787, |
| "logits/rejected": -1.2826154232025146, |
| "logps/chosen": -356.80303955078125, |
| "logps/rejected": -462.6808166503906, |
| "loss": 0.05, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": -13.879257202148438, |
| "rewards/margins": 8.64616584777832, |
| "rewards/rejected": -22.525421142578125, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.08079357242246062, |
| "grad_norm": 3.53125, |
| "learning_rate": 1.6428927505741077e-05, |
| "logits/chosen": -1.3274773359298706, |
| "logits/rejected": -1.3538029193878174, |
| "logps/chosen": -351.6380920410156, |
| "logps/rejected": -460.96075439453125, |
| "loss": 0.0209, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -12.758635520935059, |
| "rewards/margins": 9.032510757446289, |
| "rewards/rejected": -21.79114532470703, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.08169127878271018, |
| "grad_norm": 2.140625, |
| "learning_rate": 1.642847188919783e-05, |
| "logits/chosen": -1.3372552394866943, |
| "logits/rejected": -1.3623110055923462, |
| "logps/chosen": -347.4330139160156, |
| "logps/rejected": -465.4501037597656, |
| "loss": 0.0277, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -12.52314281463623, |
| "rewards/margins": 9.909950256347656, |
| "rewards/rejected": -22.43309211730957, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.08258898514295973, |
| "grad_norm": 5.5, |
| "learning_rate": 1.6428010814506082e-05, |
| "logits/chosen": -1.3123576641082764, |
| "logits/rejected": -1.339634895324707, |
| "logps/chosen": -338.0999450683594, |
| "logps/rejected": -456.74310302734375, |
| "loss": 0.0419, |
| "rewards/accuracies": 0.96875, |
| "rewards/chosen": -13.015867233276367, |
| "rewards/margins": 9.875136375427246, |
| "rewards/rejected": -22.891002655029297, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.0834866915032093, |
| "grad_norm": 1.703125, |
| "learning_rate": 1.6427544282075123e-05, |
| "logits/chosen": -1.3849332332611084, |
| "logits/rejected": -1.4038164615631104, |
| "logps/chosen": -353.74908447265625, |
| "logps/rejected": -478.51287841796875, |
| "loss": 0.0462, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -13.985809326171875, |
| "rewards/margins": 10.343815803527832, |
| "rewards/rejected": -24.32962417602539, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.08438439786345886, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 1.642707229231909e-05, |
| "logits/chosen": -1.3579334020614624, |
| "logits/rejected": -1.3771896362304688, |
| "logps/chosen": -369.145751953125, |
| "logps/rejected": -490.97100830078125, |
| "loss": 0.0146, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": -15.570086479187012, |
| "rewards/margins": 9.97675895690918, |
| "rewards/rejected": -25.546846389770508, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.08528210422370842, |
| "grad_norm": 1.8203125, |
| "learning_rate": 1.6426594845656973e-05, |
| "logits/chosen": -1.355943202972412, |
| "logits/rejected": -1.3650095462799072, |
| "logps/chosen": -376.259521484375, |
| "logps/rejected": -499.44970703125, |
| "loss": 0.0148, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": -15.661959648132324, |
| "rewards/margins": 10.384978294372559, |
| "rewards/rejected": -26.04693603515625, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.08617981058395799, |
| "grad_norm": 2.265625, |
| "learning_rate": 1.642611194251259e-05, |
| "logits/chosen": -1.355452299118042, |
| "logits/rejected": -1.3569139242172241, |
| "logps/chosen": -376.9379577636719, |
| "logps/rejected": -506.89666748046875, |
| "loss": 0.0385, |
| "rewards/accuracies": 0.971875011920929, |
| "rewards/chosen": -15.602932929992676, |
| "rewards/margins": 10.967208862304688, |
| "rewards/rejected": -26.570140838623047, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.08707751694420755, |
| "grad_norm": 1.71875, |
| "learning_rate": 1.642562358331462e-05, |
| "logits/chosen": -1.3472042083740234, |
| "logits/rejected": -1.3535155057907104, |
| "logps/chosen": -362.6130676269531, |
| "logps/rejected": -499.9092712402344, |
| "loss": 0.0131, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -14.739013671875, |
| "rewards/margins": 11.621049880981445, |
| "rewards/rejected": -26.360065460205078, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.08797522330445712, |
| "grad_norm": 1.1171875, |
| "learning_rate": 1.6425129768496577e-05, |
| "logits/chosen": -1.3245633840560913, |
| "logits/rejected": -1.3288484811782837, |
| "logps/chosen": -364.94903564453125, |
| "logps/rejected": -503.7896423339844, |
| "loss": 0.0225, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -14.195643424987793, |
| "rewards/margins": 11.918843269348145, |
| "rewards/rejected": -26.114486694335938, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.08887292966470667, |
| "grad_norm": 3.65625, |
| "learning_rate": 1.6424630498496813e-05, |
| "logits/chosen": -1.3164643049240112, |
| "logits/rejected": -1.3220335245132446, |
| "logps/chosen": -365.0807189941406, |
| "logps/rejected": -510.30340576171875, |
| "loss": 0.0186, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -14.422874450683594, |
| "rewards/margins": 12.447381973266602, |
| "rewards/rejected": -26.870258331298828, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.08977063602495623, |
| "grad_norm": 2.203125, |
| "learning_rate": 1.6424125773758535e-05, |
| "logits/chosen": -1.418001413345337, |
| "logits/rejected": -1.4166367053985596, |
| "logps/chosen": -361.4501037597656, |
| "logps/rejected": -499.55902099609375, |
| "loss": 0.0316, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -14.319913864135742, |
| "rewards/margins": 11.92530632019043, |
| "rewards/rejected": -26.245220184326172, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.08977063602495623, |
| "eval_logits/chosen": -1.3433603048324585, |
| "eval_logits/rejected": -1.36442232131958, |
| "eval_logps/chosen": -369.4107666015625, |
| "eval_logps/rejected": -511.26239013671875, |
| "eval_loss": 0.014039273373782635, |
| "eval_rewards/accuracies": 0.9900000095367432, |
| "eval_rewards/chosen": -13.992281913757324, |
| "eval_rewards/margins": 12.298800468444824, |
| "eval_rewards/rejected": -26.29108238220215, |
| "eval_runtime": 10.3033, |
| "eval_samples_per_second": 19.411, |
| "eval_steps_per_second": 19.411, |
| "step": 500 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 16707, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|