| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 5000, | |
| "global_step": 20074, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004981568197668626, | |
| "grad_norm": 81.31034088134766, | |
| "learning_rate": 1.9999863931243543e-05, | |
| "logits/chosen": -19.35576057434082, | |
| "logits/rejected": -19.391923904418945, | |
| "logps/chosen": -488.51171875, | |
| "logps/rejected": -382.52825927734375, | |
| "loss": 0.6551, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": 0.4239501953125, | |
| "rewards/margins": 1.4092838764190674, | |
| "rewards/rejected": -0.9853336215019226, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.009963136395337252, | |
| "grad_norm": 27.6790828704834, | |
| "learning_rate": 1.9999455728677112e-05, | |
| "logits/chosen": -18.520322799682617, | |
| "logits/rejected": -18.58489227294922, | |
| "logps/chosen": -502.153564453125, | |
| "logps/rejected": -427.2685241699219, | |
| "loss": 1.088, | |
| "rewards/accuracies": 0.4699999988079071, | |
| "rewards/chosen": 1.2840694189071655, | |
| "rewards/margins": -0.12594786286354065, | |
| "rewards/rejected": 1.4100172519683838, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.014944704593005878, | |
| "grad_norm": 14.743182182312012, | |
| "learning_rate": 1.999877540340943e-05, | |
| "logits/chosen": -18.121265411376953, | |
| "logits/rejected": -17.966760635375977, | |
| "logps/chosen": -480.9696960449219, | |
| "logps/rejected": -391.3818359375, | |
| "loss": 0.846, | |
| "rewards/accuracies": 0.6100000143051147, | |
| "rewards/chosen": 2.156686305999756, | |
| "rewards/margins": 0.4148028492927551, | |
| "rewards/rejected": 1.7418835163116455, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.019926272790674503, | |
| "grad_norm": 0.3787066340446472, | |
| "learning_rate": 1.99978229739547e-05, | |
| "logits/chosen": -18.205398559570312, | |
| "logits/rejected": -18.042299270629883, | |
| "logps/chosen": -502.7016296386719, | |
| "logps/rejected": -388.99835205078125, | |
| "loss": 0.7988, | |
| "rewards/accuracies": 0.5799999833106995, | |
| "rewards/chosen": 2.600520372390747, | |
| "rewards/margins": 0.4970521926879883, | |
| "rewards/rejected": 2.1034679412841797, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.02490784098834313, | |
| "grad_norm": 0.6821377873420715, | |
| "learning_rate": 1.9996598466232097e-05, | |
| "logits/chosen": -18.351791381835938, | |
| "logits/rejected": -18.350332260131836, | |
| "logps/chosen": -495.239501953125, | |
| "logps/rejected": -396.9656677246094, | |
| "loss": 0.9516, | |
| "rewards/accuracies": 0.6399999856948853, | |
| "rewards/chosen": 2.7651679515838623, | |
| "rewards/margins": 0.4416518807411194, | |
| "rewards/rejected": 2.3235161304473877, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.029889409186011757, | |
| "grad_norm": 0.07361862808465958, | |
| "learning_rate": 1.9995101913565075e-05, | |
| "logits/chosen": -18.08759117126465, | |
| "logits/rejected": -18.078266143798828, | |
| "logps/chosen": -500.9162902832031, | |
| "logps/rejected": -413.58673095703125, | |
| "loss": 0.8741, | |
| "rewards/accuracies": 0.5799999833106995, | |
| "rewards/chosen": 3.083859443664551, | |
| "rewards/margins": 0.49825409054756165, | |
| "rewards/rejected": 2.5856053829193115, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.034870977383680384, | |
| "grad_norm": 50.79634475708008, | |
| "learning_rate": 1.9993333356680442e-05, | |
| "logits/chosen": -17.93349838256836, | |
| "logits/rejected": -17.859838485717773, | |
| "logps/chosen": -576.14501953125, | |
| "logps/rejected": -481.7210693359375, | |
| "loss": 0.9994, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 2.9873363971710205, | |
| "rewards/margins": 0.3300691843032837, | |
| "rewards/rejected": 2.6572670936584473, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.03985254558134901, | |
| "grad_norm": 11.607032775878906, | |
| "learning_rate": 1.999129284370727e-05, | |
| "logits/chosen": -18.006515502929688, | |
| "logits/rejected": -17.87860107421875, | |
| "logps/chosen": -511.5252990722656, | |
| "logps/rejected": -448.138916015625, | |
| "loss": 1.0173, | |
| "rewards/accuracies": 0.5299999713897705, | |
| "rewards/chosen": 3.2953906059265137, | |
| "rewards/margins": 0.5231221318244934, | |
| "rewards/rejected": 2.772268772125244, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.04483411377901764, | |
| "grad_norm": 9.66622257232666, | |
| "learning_rate": 1.9988980430175565e-05, | |
| "logits/chosen": -17.94629669189453, | |
| "logits/rejected": -17.793624877929688, | |
| "logps/chosen": -471.1706237792969, | |
| "logps/rejected": -380.40625, | |
| "loss": 0.7616, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 3.2259023189544678, | |
| "rewards/margins": 0.9019778966903687, | |
| "rewards/rejected": 2.3239243030548096, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.04981568197668626, | |
| "grad_norm": 33.115692138671875, | |
| "learning_rate": 1.998639617901478e-05, | |
| "logits/chosen": -18.29867935180664, | |
| "logits/rejected": -18.21681785583496, | |
| "logps/chosen": -492.39471435546875, | |
| "logps/rejected": -397.0972900390625, | |
| "loss": 0.8836, | |
| "rewards/accuracies": 0.6399999856948853, | |
| "rewards/chosen": 4.284320831298828, | |
| "rewards/margins": 1.2680495977401733, | |
| "rewards/rejected": 3.0162715911865234, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.05479725017435489, | |
| "grad_norm": 14.794266700744629, | |
| "learning_rate": 1.998354016055208e-05, | |
| "logits/chosen": -17.866899490356445, | |
| "logits/rejected": -17.793582916259766, | |
| "logps/chosen": -512.371337890625, | |
| "logps/rejected": -424.1750793457031, | |
| "loss": 1.2634, | |
| "rewards/accuracies": 0.5600000023841858, | |
| "rewards/chosen": 3.433595895767212, | |
| "rewards/margins": 0.3185270428657532, | |
| "rewards/rejected": 3.1150686740875244, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.059778818372023514, | |
| "grad_norm": 177.02552795410156, | |
| "learning_rate": 1.998041245251044e-05, | |
| "logits/chosen": -18.298795700073242, | |
| "logits/rejected": -18.078033447265625, | |
| "logps/chosen": -464.45086669921875, | |
| "logps/rejected": -387.45428466796875, | |
| "loss": 1.0062, | |
| "rewards/accuracies": 0.5799999833106995, | |
| "rewards/chosen": 3.7040328979492188, | |
| "rewards/margins": 0.5431541800498962, | |
| "rewards/rejected": 3.1608786582946777, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.06476038656969214, | |
| "grad_norm": 19.352441787719727, | |
| "learning_rate": 1.997701314000653e-05, | |
| "logits/chosen": -18.20465660095215, | |
| "logits/rejected": -18.182998657226562, | |
| "logps/chosen": -489.5882873535156, | |
| "logps/rejected": -431.6067199707031, | |
| "loss": 0.8781, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 4.215704917907715, | |
| "rewards/margins": 0.7704020738601685, | |
| "rewards/rejected": 3.445302963256836, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.06974195476736077, | |
| "grad_norm": 22.920259475708008, | |
| "learning_rate": 1.9973342315548398e-05, | |
| "logits/chosen": -18.116256713867188, | |
| "logits/rejected": -18.149843215942383, | |
| "logps/chosen": -447.0269775390625, | |
| "logps/rejected": -374.61383056640625, | |
| "loss": 1.0611, | |
| "rewards/accuracies": 0.5600000023841858, | |
| "rewards/chosen": 3.63736629486084, | |
| "rewards/margins": 0.6730349063873291, | |
| "rewards/rejected": 2.96433162689209, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.07472352296502939, | |
| "grad_norm": 20.65612030029297, | |
| "learning_rate": 1.9969400079032947e-05, | |
| "logits/chosen": -18.347074508666992, | |
| "logits/rejected": -18.040178298950195, | |
| "logps/chosen": -453.944091796875, | |
| "logps/rejected": -380.4056396484375, | |
| "loss": 1.0204, | |
| "rewards/accuracies": 0.5799999833106995, | |
| "rewards/chosen": 4.042062282562256, | |
| "rewards/margins": 0.9009463787078857, | |
| "rewards/rejected": 3.1411163806915283, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.07970509116269801, | |
| "grad_norm": 50.09711456298828, | |
| "learning_rate": 1.9965186537743215e-05, | |
| "logits/chosen": -18.355621337890625, | |
| "logits/rejected": -18.051054000854492, | |
| "logps/chosen": -502.7710876464844, | |
| "logps/rejected": -419.5497741699219, | |
| "loss": 1.1749, | |
| "rewards/accuracies": 0.5899999737739563, | |
| "rewards/chosen": 3.215543508529663, | |
| "rewards/margins": 0.48448604345321655, | |
| "rewards/rejected": 2.731057643890381, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.08468665936036664, | |
| "grad_norm": 0.4332411289215088, | |
| "learning_rate": 1.9960701806345472e-05, | |
| "logits/chosen": -18.210161209106445, | |
| "logits/rejected": -18.06623077392578, | |
| "logps/chosen": -449.0904235839844, | |
| "logps/rejected": -365.96173095703125, | |
| "loss": 0.7165, | |
| "rewards/accuracies": 0.6399999856948853, | |
| "rewards/chosen": 4.172327995300293, | |
| "rewards/margins": 1.4230843782424927, | |
| "rewards/rejected": 2.749243974685669, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.08966822755803527, | |
| "grad_norm": 5.466333389282227, | |
| "learning_rate": 1.9955946006886082e-05, | |
| "logits/chosen": -18.5748348236084, | |
| "logits/rejected": -18.16517448425293, | |
| "logps/chosen": -438.857421875, | |
| "logps/rejected": -416.3319396972656, | |
| "loss": 0.8766, | |
| "rewards/accuracies": 0.5699999928474426, | |
| "rewards/chosen": 4.177321910858154, | |
| "rewards/margins": 0.9888943433761597, | |
| "rewards/rejected": 3.188427686691284, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.0946497957557039, | |
| "grad_norm": 82.25857543945312, | |
| "learning_rate": 1.995091926878819e-05, | |
| "logits/chosen": -18.491161346435547, | |
| "logits/rejected": -18.283767700195312, | |
| "logps/chosen": -461.55023193359375, | |
| "logps/rejected": -394.8086242675781, | |
| "loss": 1.0777, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 3.3734805583953857, | |
| "rewards/margins": 0.7351935505867004, | |
| "rewards/rejected": 2.638287305831909, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.09963136395337252, | |
| "grad_norm": 9.021427154541016, | |
| "learning_rate": 1.9945621728848194e-05, | |
| "logits/chosen": -18.71115493774414, | |
| "logits/rejected": -18.314943313598633, | |
| "logps/chosen": -466.3949890136719, | |
| "logps/rejected": -393.8585205078125, | |
| "loss": 0.6798, | |
| "rewards/accuracies": 0.7300000190734863, | |
| "rewards/chosen": 3.843343496322632, | |
| "rewards/margins": 1.1383906602859497, | |
| "rewards/rejected": 2.7049529552459717, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.10461293215104114, | |
| "grad_norm": 146.21475219726562, | |
| "learning_rate": 1.9940053531232028e-05, | |
| "logits/chosen": -18.590173721313477, | |
| "logits/rejected": -18.441553115844727, | |
| "logps/chosen": -454.3846435546875, | |
| "logps/rejected": -387.5030212402344, | |
| "loss": 1.1674, | |
| "rewards/accuracies": 0.5699999928474426, | |
| "rewards/chosen": 4.06519079208374, | |
| "rewards/margins": 0.5631230473518372, | |
| "rewards/rejected": 3.5020673274993896, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.10959450034870978, | |
| "grad_norm": 4.808138847351074, | |
| "learning_rate": 1.9934214827471244e-05, | |
| "logits/chosen": -18.621475219726562, | |
| "logits/rejected": -18.35665512084961, | |
| "logps/chosen": -450.15582275390625, | |
| "logps/rejected": -386.0933532714844, | |
| "loss": 1.0704, | |
| "rewards/accuracies": 0.5699999928474426, | |
| "rewards/chosen": 3.885467290878296, | |
| "rewards/margins": 0.752030611038208, | |
| "rewards/rejected": 3.1334362030029297, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.1145760685463784, | |
| "grad_norm": 5.842870235443115, | |
| "learning_rate": 1.9928105776458864e-05, | |
| "logits/chosen": -18.336530685424805, | |
| "logits/rejected": -18.11532211303711, | |
| "logps/chosen": -466.25738525390625, | |
| "logps/rejected": -393.3591003417969, | |
| "loss": 1.1451, | |
| "rewards/accuracies": 0.5799999833106995, | |
| "rewards/chosen": 3.678475856781006, | |
| "rewards/margins": 0.4539036452770233, | |
| "rewards/rejected": 3.22457218170166, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.11955763674404703, | |
| "grad_norm": 37.61786651611328, | |
| "learning_rate": 1.9921726544445084e-05, | |
| "logits/chosen": -18.296964645385742, | |
| "logits/rejected": -18.364625930786133, | |
| "logps/chosen": -467.84027099609375, | |
| "logps/rejected": -397.1224670410156, | |
| "loss": 0.8784, | |
| "rewards/accuracies": 0.6899999976158142, | |
| "rewards/chosen": 3.5043909549713135, | |
| "rewards/margins": 1.0125629901885986, | |
| "rewards/rejected": 2.4918274879455566, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.12453920494171565, | |
| "grad_norm": 49.04602813720703, | |
| "learning_rate": 1.9915077305032748e-05, | |
| "logits/chosen": -18.40894317626953, | |
| "logits/rejected": -18.2955322265625, | |
| "logps/chosen": -503.1138610839844, | |
| "logps/rejected": -375.34442138671875, | |
| "loss": 0.9985, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 4.6265459060668945, | |
| "rewards/margins": 1.7104928493499756, | |
| "rewards/rejected": 2.91605281829834, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.1295207731393843, | |
| "grad_norm": 16.836668014526367, | |
| "learning_rate": 1.9908158239172596e-05, | |
| "logits/chosen": -18.674049377441406, | |
| "logits/rejected": -18.514965057373047, | |
| "logps/chosen": -455.519775390625, | |
| "logps/rejected": -374.22210693359375, | |
| "loss": 0.9035, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 4.500489711761475, | |
| "rewards/margins": 1.446923017501831, | |
| "rewards/rejected": 3.0535662174224854, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.1345023413370529, | |
| "grad_norm": 0.04983401298522949, | |
| "learning_rate": 1.990096953515836e-05, | |
| "logits/chosen": -18.647964477539062, | |
| "logits/rejected": -18.637880325317383, | |
| "logps/chosen": -465.9200439453125, | |
| "logps/rejected": -411.40838623046875, | |
| "loss": 1.2207, | |
| "rewards/accuracies": 0.5899999737739563, | |
| "rewards/chosen": 3.8630013465881348, | |
| "rewards/margins": 0.7057845592498779, | |
| "rewards/rejected": 3.157216787338257, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.13948390953472153, | |
| "grad_norm": 0.5644310116767883, | |
| "learning_rate": 1.9893511388621652e-05, | |
| "logits/chosen": -18.66870880126953, | |
| "logits/rejected": -18.76462745666504, | |
| "logps/chosen": -513.6793823242188, | |
| "logps/rejected": -469.8594055175781, | |
| "loss": 1.5471, | |
| "rewards/accuracies": 0.6100000143051147, | |
| "rewards/chosen": 3.391056537628174, | |
| "rewards/margins": 0.3123472332954407, | |
| "rewards/rejected": 3.078709363937378, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.14446547773239016, | |
| "grad_norm": 1.6863278150558472, | |
| "learning_rate": 1.9885784002526616e-05, | |
| "logits/chosen": -18.729633331298828, | |
| "logits/rejected": -19.068260192871094, | |
| "logps/chosen": -447.2772521972656, | |
| "logps/rejected": -352.67791748046875, | |
| "loss": 1.0868, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 3.924001693725586, | |
| "rewards/margins": 0.8989758491516113, | |
| "rewards/rejected": 3.0250258445739746, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.14944704593005878, | |
| "grad_norm": 0.10964024066925049, | |
| "learning_rate": 1.987778758716441e-05, | |
| "logits/chosen": -19.014976501464844, | |
| "logits/rejected": -19.501911163330078, | |
| "logps/chosen": -475.1939392089844, | |
| "logps/rejected": -385.38238525390625, | |
| "loss": 1.0756, | |
| "rewards/accuracies": 0.5600000023841858, | |
| "rewards/chosen": 4.359086036682129, | |
| "rewards/margins": 1.1548995971679688, | |
| "rewards/rejected": 3.2041866779327393, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.1544286141277274, | |
| "grad_norm": 7.479519367218018, | |
| "learning_rate": 1.98695223601475e-05, | |
| "logits/chosen": -18.8636531829834, | |
| "logits/rejected": -19.16086196899414, | |
| "logps/chosen": -484.1092529296875, | |
| "logps/rejected": -399.1905212402344, | |
| "loss": 0.8348, | |
| "rewards/accuracies": 0.7099999785423279, | |
| "rewards/chosen": 4.458531379699707, | |
| "rewards/margins": 1.7625274658203125, | |
| "rewards/rejected": 2.6960039138793945, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.15941018232539603, | |
| "grad_norm": 15.998089790344238, | |
| "learning_rate": 1.986098854640371e-05, | |
| "logits/chosen": -18.937522888183594, | |
| "logits/rejected": -19.118017196655273, | |
| "logps/chosen": -463.34149169921875, | |
| "logps/rejected": -415.29827880859375, | |
| "loss": 1.298, | |
| "rewards/accuracies": 0.6200000047683716, | |
| "rewards/chosen": 4.227731227874756, | |
| "rewards/margins": 0.5569795370101929, | |
| "rewards/rejected": 3.6707510948181152, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.16439175052306465, | |
| "grad_norm": 0.7655884027481079, | |
| "learning_rate": 1.9852186378170136e-05, | |
| "logits/chosen": -18.893104553222656, | |
| "logits/rejected": -19.257871627807617, | |
| "logps/chosen": -531.3560791015625, | |
| "logps/rejected": -465.7754821777344, | |
| "loss": 1.1944, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 4.285092353820801, | |
| "rewards/margins": 1.1729285717010498, | |
| "rewards/rejected": 3.1121633052825928, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.16937331872073327, | |
| "grad_norm": 10.586421012878418, | |
| "learning_rate": 1.9843116094986783e-05, | |
| "logits/chosen": -18.89116859436035, | |
| "logits/rejected": -19.32411003112793, | |
| "logps/chosen": -466.8319091796875, | |
| "logps/rejected": -388.9800109863281, | |
| "loss": 0.8162, | |
| "rewards/accuracies": 0.6200000047683716, | |
| "rewards/chosen": 4.402754306793213, | |
| "rewards/margins": 1.1992639303207397, | |
| "rewards/rejected": 3.203490972518921, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.17435488691840192, | |
| "grad_norm": 0.0021288192365318537, | |
| "learning_rate": 1.983377794369009e-05, | |
| "logits/chosen": -18.90306854248047, | |
| "logits/rejected": -19.6688289642334, | |
| "logps/chosen": -506.9422912597656, | |
| "logps/rejected": -422.39703369140625, | |
| "loss": 0.9919, | |
| "rewards/accuracies": 0.5600000023841858, | |
| "rewards/chosen": 4.193739414215088, | |
| "rewards/margins": 1.1028869152069092, | |
| "rewards/rejected": 3.0908522605895996, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.17933645511607055, | |
| "grad_norm": 72.208251953125, | |
| "learning_rate": 1.982417217840618e-05, | |
| "logits/chosen": -19.198213577270508, | |
| "logits/rejected": -20.100387573242188, | |
| "logps/chosen": -498.4687805175781, | |
| "logps/rejected": -383.0224914550781, | |
| "loss": 0.9324, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 4.158178806304932, | |
| "rewards/margins": 1.5135530233383179, | |
| "rewards/rejected": 2.6446259021759033, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.18431802331373917, | |
| "grad_norm": 54.43558120727539, | |
| "learning_rate": 1.9814299060543965e-05, | |
| "logits/chosen": -19.100000381469727, | |
| "logits/rejected": -20.164613723754883, | |
| "logps/chosen": -523.7534790039062, | |
| "logps/rejected": -420.5929260253906, | |
| "loss": 1.0645, | |
| "rewards/accuracies": 0.6200000047683716, | |
| "rewards/chosen": 4.555855751037598, | |
| "rewards/margins": 1.3967076539993286, | |
| "rewards/rejected": 3.1591484546661377, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.1892995915114078, | |
| "grad_norm": 44.80778503417969, | |
| "learning_rate": 1.980415885878801e-05, | |
| "logits/chosen": -19.23442840576172, | |
| "logits/rejected": -20.248851776123047, | |
| "logps/chosen": -470.9892578125, | |
| "logps/rejected": -387.8487548828125, | |
| "loss": 1.1345, | |
| "rewards/accuracies": 0.6299999952316284, | |
| "rewards/chosen": 4.9033355712890625, | |
| "rewards/margins": 1.5088270902633667, | |
| "rewards/rejected": 3.3945086002349854, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.19428115970907642, | |
| "grad_norm": 0.029925603419542313, | |
| "learning_rate": 1.979375184909125e-05, | |
| "logits/chosen": -19.161788940429688, | |
| "logits/rejected": -20.242706298828125, | |
| "logps/chosen": -451.88165283203125, | |
| "logps/rejected": -377.80078125, | |
| "loss": 1.0498, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 3.468371868133545, | |
| "rewards/margins": 1.5844755172729492, | |
| "rewards/rejected": 1.8838963508605957, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.19926272790674504, | |
| "grad_norm": 2.616316795349121, | |
| "learning_rate": 1.9783078314667465e-05, | |
| "logits/chosen": -19.053321838378906, | |
| "logits/rejected": -20.00080108642578, | |
| "logps/chosen": -502.9007568359375, | |
| "logps/rejected": -397.90020751953125, | |
| "loss": 0.9393, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 4.124849796295166, | |
| "rewards/margins": 1.3231381177902222, | |
| "rewards/rejected": 2.801711320877075, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.20424429610441366, | |
| "grad_norm": 44.81772232055664, | |
| "learning_rate": 1.9772138545983554e-05, | |
| "logits/chosen": -18.997940063476562, | |
| "logits/rejected": -19.999465942382812, | |
| "logps/chosen": -494.4920349121094, | |
| "logps/rejected": -401.1126708984375, | |
| "loss": 0.9059, | |
| "rewards/accuracies": 0.6399999856948853, | |
| "rewards/chosen": 3.8888561725616455, | |
| "rewards/margins": 1.703158974647522, | |
| "rewards/rejected": 2.185697317123413, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.2092258643020823, | |
| "grad_norm": 19.425662994384766, | |
| "learning_rate": 1.9760932840751663e-05, | |
| "logits/chosen": -18.9016056060791, | |
| "logits/rejected": -19.416828155517578, | |
| "logps/chosen": -483.6650390625, | |
| "logps/rejected": -388.0088195800781, | |
| "loss": 0.8963, | |
| "rewards/accuracies": 0.6299999952316284, | |
| "rewards/chosen": 5.2860283851623535, | |
| "rewards/margins": 1.5827534198760986, | |
| "rewards/rejected": 3.703275442123413, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.2142074324997509, | |
| "grad_norm": 124.74931335449219, | |
| "learning_rate": 1.9749461503921074e-05, | |
| "logits/chosen": -18.898042678833008, | |
| "logits/rejected": -19.673877716064453, | |
| "logps/chosen": -497.69476318359375, | |
| "logps/rejected": -364.8540954589844, | |
| "loss": 1.0963, | |
| "rewards/accuracies": 0.6299999952316284, | |
| "rewards/chosen": 4.136105537414551, | |
| "rewards/margins": 1.3833444118499756, | |
| "rewards/rejected": 2.752761125564575, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.21918900069741956, | |
| "grad_norm": 5.577426433563232, | |
| "learning_rate": 1.973772484766989e-05, | |
| "logits/chosen": -18.805566787719727, | |
| "logits/rejected": -19.62226104736328, | |
| "logps/chosen": -463.9582824707031, | |
| "logps/rejected": -351.70867919921875, | |
| "loss": 1.0113, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 4.1188740730285645, | |
| "rewards/margins": 1.5241186618804932, | |
| "rewards/rejected": 2.5947556495666504, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.22417056889508818, | |
| "grad_norm": 0.4124658405780792, | |
| "learning_rate": 1.9725723191396557e-05, | |
| "logits/chosen": -18.83307647705078, | |
| "logits/rejected": -19.278696060180664, | |
| "logps/chosen": -466.99859619140625, | |
| "logps/rejected": -398.4786682128906, | |
| "loss": 1.622, | |
| "rewards/accuracies": 0.5899999737739563, | |
| "rewards/chosen": 4.280797958374023, | |
| "rewards/margins": 0.31094062328338623, | |
| "rewards/rejected": 3.9698569774627686, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.2291521370927568, | |
| "grad_norm": 46.63705825805664, | |
| "learning_rate": 1.971345686171116e-05, | |
| "logits/chosen": -18.672901153564453, | |
| "logits/rejected": -19.950056076049805, | |
| "logps/chosen": -500.7174072265625, | |
| "logps/rejected": -417.15191650390625, | |
| "loss": 0.9252, | |
| "rewards/accuracies": 0.6100000143051147, | |
| "rewards/chosen": 4.5458478927612305, | |
| "rewards/margins": 1.576623558998108, | |
| "rewards/rejected": 2.969224452972412, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.23413370529042543, | |
| "grad_norm": 18.882240295410156, | |
| "learning_rate": 1.9700926192426554e-05, | |
| "logits/chosen": -19.082120895385742, | |
| "logits/rejected": -20.37308120727539, | |
| "logps/chosen": -429.0272521972656, | |
| "logps/rejected": -354.2383728027344, | |
| "loss": 1.1165, | |
| "rewards/accuracies": 0.6299999952316284, | |
| "rewards/chosen": 3.655416965484619, | |
| "rewards/margins": 1.2390317916870117, | |
| "rewards/rejected": 2.4163851737976074, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.23911527348809405, | |
| "grad_norm": 336.6611633300781, | |
| "learning_rate": 1.9688131524549242e-05, | |
| "logits/chosen": -19.020198822021484, | |
| "logits/rejected": -19.45013999938965, | |
| "logps/chosen": -459.5777587890625, | |
| "logps/rejected": -409.13922119140625, | |
| "loss": 1.2777, | |
| "rewards/accuracies": 0.5600000023841858, | |
| "rewards/chosen": 2.606769323348999, | |
| "rewards/margins": 0.6296383142471313, | |
| "rewards/rejected": 1.9771310091018677, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.24409684168576268, | |
| "grad_norm": 72.95674133300781, | |
| "learning_rate": 1.9675073206270148e-05, | |
| "logits/chosen": -18.523130416870117, | |
| "logits/rejected": -19.362272262573242, | |
| "logps/chosen": -499.9466552734375, | |
| "logps/rejected": -384.0169982910156, | |
| "loss": 0.9484, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 4.083195209503174, | |
| "rewards/margins": 1.7932839393615723, | |
| "rewards/rejected": 2.2899110317230225, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.2490784098834313, | |
| "grad_norm": 7.091724395751953, | |
| "learning_rate": 1.9661751592955086e-05, | |
| "logits/chosen": -18.576244354248047, | |
| "logits/rejected": -19.872777938842773, | |
| "logps/chosen": -543.4857177734375, | |
| "logps/rejected": -440.7587890625, | |
| "loss": 0.9586, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 4.305512428283691, | |
| "rewards/margins": 1.4709885120391846, | |
| "rewards/rejected": 2.8345236778259277, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.2490784098834313, | |
| "eval_logits/chosen": -21.543413162231445, | |
| "eval_logits/rejected": -22.481142044067383, | |
| "eval_logps/chosen": -475.4376525878906, | |
| "eval_logps/rejected": -402.1141052246094, | |
| "eval_loss": 1.169881820678711, | |
| "eval_rewards/accuracies": 0.6557591557502747, | |
| "eval_rewards/chosen": 4.376668930053711, | |
| "eval_rewards/margins": 1.5471277236938477, | |
| "eval_rewards/rejected": 2.829540729522705, | |
| "eval_runtime": 473.1936, | |
| "eval_samples_per_second": 3.216, | |
| "eval_steps_per_second": 0.404, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.2540599780810999, | |
| "grad_norm": 82.43436431884766, | |
| "learning_rate": 1.9648167047135133e-05, | |
| "logits/chosen": -19.058635711669922, | |
| "logits/rejected": -20.134428024291992, | |
| "logps/chosen": -495.65887451171875, | |
| "logps/rejected": -419.3161315917969, | |
| "loss": 1.1698, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 3.87038516998291, | |
| "rewards/margins": 1.3453155755996704, | |
| "rewards/rejected": 2.52506947517395, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.2590415462787686, | |
| "grad_norm": 3.7819454669952393, | |
| "learning_rate": 1.9634319938496742e-05, | |
| "logits/chosen": -19.017601013183594, | |
| "logits/rejected": -20.623193740844727, | |
| "logps/chosen": -478.9990539550781, | |
| "logps/rejected": -395.05450439453125, | |
| "loss": 1.1279, | |
| "rewards/accuracies": 0.6700000166893005, | |
| "rewards/chosen": 3.3154103755950928, | |
| "rewards/margins": 1.5955133438110352, | |
| "rewards/rejected": 1.7198967933654785, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.26402311447643717, | |
| "grad_norm": 1.351638674736023, | |
| "learning_rate": 1.962021064387168e-05, | |
| "logits/chosen": -18.885652542114258, | |
| "logits/rejected": -19.914079666137695, | |
| "logps/chosen": -510.6768493652344, | |
| "logps/rejected": -441.775634765625, | |
| "loss": 1.1382, | |
| "rewards/accuracies": 0.6100000143051147, | |
| "rewards/chosen": 4.05587911605835, | |
| "rewards/margins": 1.2654640674591064, | |
| "rewards/rejected": 2.790414810180664, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.2690046826741058, | |
| "grad_norm": 32.41090393066406, | |
| "learning_rate": 1.9605839547226785e-05, | |
| "logits/chosen": -19.33073616027832, | |
| "logits/rejected": -20.477949142456055, | |
| "logps/chosen": -492.5516052246094, | |
| "logps/rejected": -420.31036376953125, | |
| "loss": 1.0482, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 3.8395843505859375, | |
| "rewards/margins": 1.6333999633789062, | |
| "rewards/rejected": 2.206184148788452, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.2739862508717744, | |
| "grad_norm": 32.58820724487305, | |
| "learning_rate": 1.9591207039653507e-05, | |
| "logits/chosen": -19.26167106628418, | |
| "logits/rejected": -20.89728546142578, | |
| "logps/chosen": -438.7798156738281, | |
| "logps/rejected": -363.2562255859375, | |
| "loss": 1.243, | |
| "rewards/accuracies": 0.6200000047683716, | |
| "rewards/chosen": 3.1530792713165283, | |
| "rewards/margins": 1.1595901250839233, | |
| "rewards/rejected": 1.9934889078140259, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.27896781906944307, | |
| "grad_norm": 93.86406707763672, | |
| "learning_rate": 1.9576313519357265e-05, | |
| "logits/chosen": -19.064878463745117, | |
| "logits/rejected": -20.905057907104492, | |
| "logps/chosen": -519.572509765625, | |
| "logps/rejected": -442.0079040527344, | |
| "loss": 1.2293, | |
| "rewards/accuracies": 0.6399999856948853, | |
| "rewards/chosen": 3.4798271656036377, | |
| "rewards/margins": 1.09993577003479, | |
| "rewards/rejected": 2.3798913955688477, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.28394938726711166, | |
| "grad_norm": 26.577350616455078, | |
| "learning_rate": 1.9561159391646618e-05, | |
| "logits/chosen": -19.34862518310547, | |
| "logits/rejected": -22.218530654907227, | |
| "logps/chosen": -500.4524230957031, | |
| "logps/rejected": -402.18389892578125, | |
| "loss": 0.9679, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 3.561279535293579, | |
| "rewards/margins": 1.6947131156921387, | |
| "rewards/rejected": 1.8665661811828613, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.2889309554647803, | |
| "grad_norm": 20.811817169189453, | |
| "learning_rate": 1.9545745068922225e-05, | |
| "logits/chosen": -19.238685607910156, | |
| "logits/rejected": -20.936817169189453, | |
| "logps/chosen": -499.0253601074219, | |
| "logps/rejected": -417.6446533203125, | |
| "loss": 1.1599, | |
| "rewards/accuracies": 0.5299999713897705, | |
| "rewards/chosen": 4.803900241851807, | |
| "rewards/margins": 1.0667366981506348, | |
| "rewards/rejected": 3.7371630668640137, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.29391252366244897, | |
| "grad_norm": 23.467056274414062, | |
| "learning_rate": 1.9530070970665638e-05, | |
| "logits/chosen": -19.428844451904297, | |
| "logits/rejected": -21.77304458618164, | |
| "logps/chosen": -498.2739562988281, | |
| "logps/rejected": -398.3455810546875, | |
| "loss": 0.9347, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 4.099443435668945, | |
| "rewards/margins": 1.4808638095855713, | |
| "rewards/rejected": 2.6185789108276367, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.29889409186011756, | |
| "grad_norm": 44.77325439453125, | |
| "learning_rate": 1.951413752342786e-05, | |
| "logits/chosen": -19.230180740356445, | |
| "logits/rejected": -21.40241050720215, | |
| "logps/chosen": -522.796630859375, | |
| "logps/rejected": -409.90936279296875, | |
| "loss": 1.0653, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 3.7546486854553223, | |
| "rewards/margins": 2.06999135017395, | |
| "rewards/rejected": 1.6846575736999512, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.3038756600577862, | |
| "grad_norm": 0.1034678965806961, | |
| "learning_rate": 1.949794516081777e-05, | |
| "logits/chosen": -19.376697540283203, | |
| "logits/rejected": -21.691804885864258, | |
| "logps/chosen": -482.21807861328125, | |
| "logps/rejected": -379.0809326171875, | |
| "loss": 0.954, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 2.9128754138946533, | |
| "rewards/margins": 1.5702927112579346, | |
| "rewards/rejected": 1.3425830602645874, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.3088572282554548, | |
| "grad_norm": 8.153312683105469, | |
| "learning_rate": 1.9481494323490292e-05, | |
| "logits/chosen": -20.608989715576172, | |
| "logits/rejected": -25.289657592773438, | |
| "logps/chosen": -456.2762756347656, | |
| "logps/rejected": -363.2749938964844, | |
| "loss": 1.0848, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 2.7271530628204346, | |
| "rewards/margins": 1.8869810104370117, | |
| "rewards/rejected": 0.8401720523834229, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.31383879645312346, | |
| "grad_norm": 173.4281463623047, | |
| "learning_rate": 1.9464785459134422e-05, | |
| "logits/chosen": -20.402162551879883, | |
| "logits/rejected": -26.399858474731445, | |
| "logps/chosen": -496.73199462890625, | |
| "logps/rejected": -371.1173400878906, | |
| "loss": 1.0173, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 3.2006542682647705, | |
| "rewards/margins": 2.35373592376709, | |
| "rewards/rejected": 0.8469181060791016, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.31882036465079205, | |
| "grad_norm": 71.35045623779297, | |
| "learning_rate": 1.9447819022461036e-05, | |
| "logits/chosen": -20.591211318969727, | |
| "logits/rejected": -21.949787139892578, | |
| "logps/chosen": -505.4292297363281, | |
| "logps/rejected": -457.5049133300781, | |
| "loss": 1.334, | |
| "rewards/accuracies": 0.5899999737739563, | |
| "rewards/chosen": 3.6680984497070312, | |
| "rewards/margins": 1.0007195472717285, | |
| "rewards/rejected": 2.6673781871795654, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.3238019328484607, | |
| "grad_norm": 71.10392761230469, | |
| "learning_rate": 1.9430595475190528e-05, | |
| "logits/chosen": -20.5976505279541, | |
| "logits/rejected": -22.98895263671875, | |
| "logps/chosen": -485.20758056640625, | |
| "logps/rejected": -426.62078857421875, | |
| "loss": 1.1786, | |
| "rewards/accuracies": 0.6299999952316284, | |
| "rewards/chosen": 3.1840951442718506, | |
| "rewards/margins": 1.0180020332336426, | |
| "rewards/rejected": 2.166092872619629, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.3287835010461293, | |
| "grad_norm": 20.788406372070312, | |
| "learning_rate": 1.9413115286040228e-05, | |
| "logits/chosen": -20.659372329711914, | |
| "logits/rejected": -24.63149642944336, | |
| "logps/chosen": -496.0128173828125, | |
| "logps/rejected": -426.76092529296875, | |
| "loss": 1.2417, | |
| "rewards/accuracies": 0.6100000143051147, | |
| "rewards/chosen": 2.6715264320373535, | |
| "rewards/margins": 1.3509292602539062, | |
| "rewards/rejected": 1.3205969333648682, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.33376506924379795, | |
| "grad_norm": 47.43177032470703, | |
| "learning_rate": 1.9395378930711654e-05, | |
| "logits/chosen": -21.26150894165039, | |
| "logits/rejected": -27.082225799560547, | |
| "logps/chosen": -484.2604675292969, | |
| "logps/rejected": -407.31512451171875, | |
| "loss": 1.0294, | |
| "rewards/accuracies": 0.7099999785423279, | |
| "rewards/chosen": 2.974027633666992, | |
| "rewards/margins": 2.4509451389312744, | |
| "rewards/rejected": 0.5230829119682312, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.33874663744146655, | |
| "grad_norm": 29.3293514251709, | |
| "learning_rate": 1.9377386891877572e-05, | |
| "logits/chosen": -20.902864456176758, | |
| "logits/rejected": -24.1262264251709, | |
| "logps/chosen": -505.2213134765625, | |
| "logps/rejected": -438.3580017089844, | |
| "loss": 1.5894, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 1.993202567100525, | |
| "rewards/margins": 0.9017642736434937, | |
| "rewards/rejected": 1.0914379358291626, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.3437282056391352, | |
| "grad_norm": 85.70783996582031, | |
| "learning_rate": 1.9359139659168845e-05, | |
| "logits/chosen": -19.933032989501953, | |
| "logits/rejected": -23.047008514404297, | |
| "logps/chosen": -496.6371154785156, | |
| "logps/rejected": -435.8622131347656, | |
| "loss": 1.0725, | |
| "rewards/accuracies": 0.6399999856948853, | |
| "rewards/chosen": 1.8428994417190552, | |
| "rewards/margins": 1.234021782875061, | |
| "rewards/rejected": 0.6088778376579285, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.34870977383680385, | |
| "grad_norm": 44.99619674682617, | |
| "learning_rate": 1.9340637729161137e-05, | |
| "logits/chosen": -20.23802375793457, | |
| "logits/rejected": -22.14116668701172, | |
| "logps/chosen": -505.2397155761719, | |
| "logps/rejected": -458.6205139160156, | |
| "loss": 1.4139, | |
| "rewards/accuracies": 0.5699999928474426, | |
| "rewards/chosen": 1.7950440645217896, | |
| "rewards/margins": 1.0467220544815063, | |
| "rewards/rejected": 0.7483220100402832, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.35369134203447244, | |
| "grad_norm": 2.535125886993228e-08, | |
| "learning_rate": 1.9321881605361363e-05, | |
| "logits/chosen": -19.776222229003906, | |
| "logits/rejected": -22.802228927612305, | |
| "logps/chosen": -552.8232421875, | |
| "logps/rejected": -477.6277770996094, | |
| "loss": 1.0137, | |
| "rewards/accuracies": 0.6899999976158142, | |
| "rewards/chosen": 3.883512020111084, | |
| "rewards/margins": 2.6781344413757324, | |
| "rewards/rejected": 1.2053773403167725, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.3586729102321411, | |
| "grad_norm": 104.68194580078125, | |
| "learning_rate": 1.9302871798194005e-05, | |
| "logits/chosen": -21.631492614746094, | |
| "logits/rejected": -25.338726043701172, | |
| "logps/chosen": -458.9781494140625, | |
| "logps/rejected": -421.4054260253906, | |
| "loss": 1.5316, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 1.5002456903457642, | |
| "rewards/margins": 1.058099389076233, | |
| "rewards/rejected": 0.4421464204788208, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.3636544784298097, | |
| "grad_norm": 15.395011901855469, | |
| "learning_rate": 1.9283608824987236e-05, | |
| "logits/chosen": -21.326448440551758, | |
| "logits/rejected": -27.821928024291992, | |
| "logps/chosen": -519.0068969726562, | |
| "logps/rejected": -431.6269226074219, | |
| "loss": 1.4097, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 2.3849658966064453, | |
| "rewards/margins": 2.2866411209106445, | |
| "rewards/rejected": 0.09832416474819183, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.36863604662747834, | |
| "grad_norm": 5.503931999206543, | |
| "learning_rate": 1.9264093209958822e-05, | |
| "logits/chosen": -22.2663631439209, | |
| "logits/rejected": -30.09918212890625, | |
| "logps/chosen": -519.89013671875, | |
| "logps/rejected": -434.9560241699219, | |
| "loss": 0.6146, | |
| "rewards/accuracies": 0.7400000095367432, | |
| "rewards/chosen": 1.8443881273269653, | |
| "rewards/margins": 3.3645260334014893, | |
| "rewards/rejected": -1.5201376676559448, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.37361761482514694, | |
| "grad_norm": 0.397699773311615, | |
| "learning_rate": 1.9244325484201844e-05, | |
| "logits/chosen": -21.55438995361328, | |
| "logits/rejected": -26.417490005493164, | |
| "logps/chosen": -578.6663208007812, | |
| "logps/rejected": -494.0219421386719, | |
| "loss": 1.32, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 2.8062126636505127, | |
| "rewards/margins": 1.9167245626449585, | |
| "rewards/rejected": 0.8894882798194885, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.3785991830228156, | |
| "grad_norm": 1.4006325006484985, | |
| "learning_rate": 1.9224306185670284e-05, | |
| "logits/chosen": -21.26766586303711, | |
| "logits/rejected": -23.203746795654297, | |
| "logps/chosen": -524.5022583007812, | |
| "logps/rejected": -450.9858093261719, | |
| "loss": 1.2222, | |
| "rewards/accuracies": 0.5699999928474426, | |
| "rewards/chosen": 2.8659799098968506, | |
| "rewards/margins": 1.5051510334014893, | |
| "rewards/rejected": 1.3608287572860718, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.3835807512204842, | |
| "grad_norm": 77.18798828125, | |
| "learning_rate": 1.9204035859164346e-05, | |
| "logits/chosen": -20.718629837036133, | |
| "logits/rejected": -24.271589279174805, | |
| "logps/chosen": -482.0295715332031, | |
| "logps/rejected": -402.9772644042969, | |
| "loss": 1.4243, | |
| "rewards/accuracies": 0.5799999833106995, | |
| "rewards/chosen": 2.1575722694396973, | |
| "rewards/margins": 1.1561360359191895, | |
| "rewards/rejected": 1.0014359951019287, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.38856231941815284, | |
| "grad_norm": 7.084451675415039, | |
| "learning_rate": 1.9183515056315664e-05, | |
| "logits/chosen": -20.11510467529297, | |
| "logits/rejected": -22.786483764648438, | |
| "logps/chosen": -521.9429931640625, | |
| "logps/rejected": -447.221923828125, | |
| "loss": 1.2535, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 3.086232900619507, | |
| "rewards/margins": 1.8227348327636719, | |
| "rewards/rejected": 1.2634981870651245, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.3935438876158215, | |
| "grad_norm": 26.435029983520508, | |
| "learning_rate": 1.9162744335572254e-05, | |
| "logits/chosen": -20.078449249267578, | |
| "logits/rejected": -21.64859390258789, | |
| "logps/chosen": -495.9106750488281, | |
| "logps/rejected": -439.6401672363281, | |
| "loss": 1.3286, | |
| "rewards/accuracies": 0.5600000023841858, | |
| "rewards/chosen": 2.769630193710327, | |
| "rewards/margins": 0.8031193017959595, | |
| "rewards/rejected": 1.9665107727050781, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.3985254558134901, | |
| "grad_norm": 55.87978744506836, | |
| "learning_rate": 1.9141724262183347e-05, | |
| "logits/chosen": -19.4700927734375, | |
| "logits/rejected": -24.67208480834961, | |
| "logps/chosen": -476.494873046875, | |
| "logps/rejected": -370.9423828125, | |
| "loss": 1.1024, | |
| "rewards/accuracies": 0.6399999856948853, | |
| "rewards/chosen": 3.3464269638061523, | |
| "rewards/margins": 1.9073596000671387, | |
| "rewards/rejected": 1.4390674829483032, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.40350702401115873, | |
| "grad_norm": 72.6644515991211, | |
| "learning_rate": 1.9120455408183996e-05, | |
| "logits/chosen": -19.84633445739746, | |
| "logits/rejected": -23.482101440429688, | |
| "logps/chosen": -469.52984619140625, | |
| "logps/rejected": -371.6656494140625, | |
| "loss": 1.0347, | |
| "rewards/accuracies": 0.6299999952316284, | |
| "rewards/chosen": 3.2709081172943115, | |
| "rewards/margins": 1.9965094327926636, | |
| "rewards/rejected": 1.2743984460830688, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.40848859220882733, | |
| "grad_norm": 7.036466121673584, | |
| "learning_rate": 1.9098938352379497e-05, | |
| "logits/chosen": -19.558134078979492, | |
| "logits/rejected": -22.82000732421875, | |
| "logps/chosen": -516.8818359375, | |
| "logps/rejected": -426.9209289550781, | |
| "loss": 1.3704, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 2.8734841346740723, | |
| "rewards/margins": 1.5538628101348877, | |
| "rewards/rejected": 1.3196213245391846, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.413470160406496, | |
| "grad_norm": 19.03754997253418, | |
| "learning_rate": 1.9077173680329667e-05, | |
| "logits/chosen": -19.861852645874023, | |
| "logits/rejected": -20.495954513549805, | |
| "logps/chosen": -434.7484436035156, | |
| "logps/rejected": -418.93182373046875, | |
| "loss": 1.4406, | |
| "rewards/accuracies": 0.5400000214576721, | |
| "rewards/chosen": 3.662661552429199, | |
| "rewards/margins": 1.0773922204971313, | |
| "rewards/rejected": 2.5852692127227783, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.4184517286041646, | |
| "grad_norm": 0.007115426007658243, | |
| "learning_rate": 1.9055161984332865e-05, | |
| "logits/chosen": -19.320505142211914, | |
| "logits/rejected": -20.781309127807617, | |
| "logps/chosen": -505.4100036621094, | |
| "logps/rejected": -399.3970031738281, | |
| "loss": 1.2344, | |
| "rewards/accuracies": 0.6399999856948853, | |
| "rewards/chosen": 2.867283582687378, | |
| "rewards/margins": 1.5176202058792114, | |
| "rewards/rejected": 1.3496633768081665, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.4234332968018332, | |
| "grad_norm": 27.222930908203125, | |
| "learning_rate": 1.9032903863409916e-05, | |
| "logits/chosen": -19.227426528930664, | |
| "logits/rejected": -20.481821060180664, | |
| "logps/chosen": -504.85076904296875, | |
| "logps/rejected": -399.4018249511719, | |
| "loss": 1.1368, | |
| "rewards/accuracies": 0.6100000143051147, | |
| "rewards/chosen": 2.5695528984069824, | |
| "rewards/margins": 1.6272262334823608, | |
| "rewards/rejected": 0.9423269033432007, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.4284148649995018, | |
| "grad_norm": 118.92176055908203, | |
| "learning_rate": 1.901039992328779e-05, | |
| "logits/chosen": -19.390897750854492, | |
| "logits/rejected": -20.656518936157227, | |
| "logps/chosen": -502.2049865722656, | |
| "logps/rejected": -406.15740966796875, | |
| "loss": 1.3008, | |
| "rewards/accuracies": 0.6399999856948853, | |
| "rewards/chosen": 2.6097073554992676, | |
| "rewards/margins": 1.4985759258270264, | |
| "rewards/rejected": 1.1111317873001099, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.43339643319717047, | |
| "grad_norm": 135.84136962890625, | |
| "learning_rate": 1.8987650776383116e-05, | |
| "logits/chosen": -19.83563232421875, | |
| "logits/rejected": -20.294017791748047, | |
| "logps/chosen": -496.2801513671875, | |
| "logps/rejected": -452.8775634765625, | |
| "loss": 1.689, | |
| "rewards/accuracies": 0.5699999928474426, | |
| "rewards/chosen": 2.978971481323242, | |
| "rewards/margins": 0.9833757281303406, | |
| "rewards/rejected": 1.9955962896347046, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.4383780013948391, | |
| "grad_norm": 0.03969337046146393, | |
| "learning_rate": 1.896465704178551e-05, | |
| "logits/chosen": -19.218610763549805, | |
| "logits/rejected": -20.0975284576416, | |
| "logps/chosen": -479.8434753417969, | |
| "logps/rejected": -410.504150390625, | |
| "loss": 1.2651, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 2.871466875076294, | |
| "rewards/margins": 1.330002784729004, | |
| "rewards/rejected": 1.5414642095565796, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.4433595695925077, | |
| "grad_norm": 2.9111685752868652, | |
| "learning_rate": 1.8941419345240763e-05, | |
| "logits/chosen": -19.58942222595215, | |
| "logits/rejected": -20.811443328857422, | |
| "logps/chosen": -435.19122314453125, | |
| "logps/rejected": -364.0976867675781, | |
| "loss": 1.164, | |
| "rewards/accuracies": 0.5899999737739563, | |
| "rewards/chosen": 3.163036823272705, | |
| "rewards/margins": 1.286778450012207, | |
| "rewards/rejected": 1.876258373260498, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.44834113779017637, | |
| "grad_norm": 92.33987426757812, | |
| "learning_rate": 1.891793831913376e-05, | |
| "logits/chosen": -19.12569808959961, | |
| "logits/rejected": -20.312471389770508, | |
| "logps/chosen": -539.9608764648438, | |
| "logps/rejected": -456.8437805175781, | |
| "loss": 1.0481, | |
| "rewards/accuracies": 0.6700000166893005, | |
| "rewards/chosen": 2.520881175994873, | |
| "rewards/margins": 1.5386346578598022, | |
| "rewards/rejected": 0.982246458530426, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.45332270598784496, | |
| "grad_norm": 24.77559471130371, | |
| "learning_rate": 1.8894214602471307e-05, | |
| "logits/chosen": -19.473718643188477, | |
| "logits/rejected": -21.318897247314453, | |
| "logps/chosen": -499.727783203125, | |
| "logps/rejected": -432.5865478515625, | |
| "loss": 1.3744, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 2.8033368587493896, | |
| "rewards/margins": 1.5860238075256348, | |
| "rewards/rejected": 1.2173125743865967, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.4583042741855136, | |
| "grad_norm": 2.553715467453003, | |
| "learning_rate": 1.887024884086473e-05, | |
| "logits/chosen": -19.989469528198242, | |
| "logits/rejected": -21.36966323852539, | |
| "logps/chosen": -485.37139892578125, | |
| "logps/rejected": -414.8341369628906, | |
| "loss": 1.2352, | |
| "rewards/accuracies": 0.6299999952316284, | |
| "rewards/chosen": 2.2446627616882324, | |
| "rewards/margins": 2.1422224044799805, | |
| "rewards/rejected": 0.10244012624025345, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.4632858423831822, | |
| "grad_norm": 9.945940017700195, | |
| "learning_rate": 1.88460416865123e-05, | |
| "logits/chosen": -19.838525772094727, | |
| "logits/rejected": -21.396879196166992, | |
| "logps/chosen": -510.5537109375, | |
| "logps/rejected": -417.2762451171875, | |
| "loss": 1.1751, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 1.9794914722442627, | |
| "rewards/margins": 1.851491093635559, | |
| "rewards/rejected": 0.12800025939941406, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.46826741058085086, | |
| "grad_norm": 15.31814193725586, | |
| "learning_rate": 1.88215937981815e-05, | |
| "logits/chosen": -19.403379440307617, | |
| "logits/rejected": -20.53765869140625, | |
| "logps/chosen": -476.90802001953125, | |
| "logps/rejected": -390.69744873046875, | |
| "loss": 1.7465, | |
| "rewards/accuracies": 0.5099999904632568, | |
| "rewards/chosen": 3.1520519256591797, | |
| "rewards/margins": 0.6633343696594238, | |
| "rewards/rejected": 2.488717555999756, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.47324897877851946, | |
| "grad_norm": 2.4341812133789062, | |
| "learning_rate": 1.879690584119108e-05, | |
| "logits/chosen": -18.863977432250977, | |
| "logits/rejected": -19.929393768310547, | |
| "logps/chosen": -451.2972717285156, | |
| "logps/rejected": -356.81878662109375, | |
| "loss": 1.0762, | |
| "rewards/accuracies": 0.6399999856948853, | |
| "rewards/chosen": 3.5310535430908203, | |
| "rewards/margins": 0.9716143012046814, | |
| "rewards/rejected": 2.5594394207000732, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.4782305469761881, | |
| "grad_norm": 0.5188534259796143, | |
| "learning_rate": 1.8771978487392965e-05, | |
| "logits/chosen": -19.067102432250977, | |
| "logits/rejected": -19.693904876708984, | |
| "logps/chosen": -436.10125732421875, | |
| "logps/rejected": -371.744140625, | |
| "loss": 1.2393, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 3.686614990234375, | |
| "rewards/margins": 1.6963415145874023, | |
| "rewards/rejected": 1.9902732372283936, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.4832121151738567, | |
| "grad_norm": 0.9292926788330078, | |
| "learning_rate": 1.874681241515396e-05, | |
| "logits/chosen": -18.863676071166992, | |
| "logits/rejected": -18.958852767944336, | |
| "logps/chosen": -462.2861328125, | |
| "logps/rejected": -413.9620361328125, | |
| "loss": 1.2257, | |
| "rewards/accuracies": 0.5899999737739563, | |
| "rewards/chosen": 3.4554378986358643, | |
| "rewards/margins": 0.9302346110343933, | |
| "rewards/rejected": 2.525202989578247, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.48819368337152536, | |
| "grad_norm": 35.07600402832031, | |
| "learning_rate": 1.8721408309337295e-05, | |
| "logits/chosen": -18.920787811279297, | |
| "logits/rejected": -19.5614070892334, | |
| "logps/chosen": -475.8876647949219, | |
| "logps/rejected": -409.3224182128906, | |
| "loss": 1.2497, | |
| "rewards/accuracies": 0.6399999856948853, | |
| "rewards/chosen": 3.723018169403076, | |
| "rewards/margins": 1.3657230138778687, | |
| "rewards/rejected": 2.357295036315918, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.493175251569194, | |
| "grad_norm": 0.003431697143241763, | |
| "learning_rate": 1.8695766861283987e-05, | |
| "logits/chosen": -18.851102828979492, | |
| "logits/rejected": -19.668804168701172, | |
| "logps/chosen": -505.24945068359375, | |
| "logps/rejected": -430.45428466796875, | |
| "loss": 1.1504, | |
| "rewards/accuracies": 0.6299999952316284, | |
| "rewards/chosen": 3.6089422702789307, | |
| "rewards/margins": 1.7381848096847534, | |
| "rewards/rejected": 1.8707573413848877, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.4981568197668626, | |
| "grad_norm": 65.25814819335938, | |
| "learning_rate": 1.8669888768794024e-05, | |
| "logits/chosen": -18.943655014038086, | |
| "logits/rejected": -19.943601608276367, | |
| "logps/chosen": -456.38531494140625, | |
| "logps/rejected": -394.1759033203125, | |
| "loss": 1.1225, | |
| "rewards/accuracies": 0.6200000047683716, | |
| "rewards/chosen": 3.0651655197143555, | |
| "rewards/margins": 1.2026199102401733, | |
| "rewards/rejected": 1.8625457286834717, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.4981568197668626, | |
| "eval_logits/chosen": -20.5496826171875, | |
| "eval_logits/rejected": -21.546123504638672, | |
| "eval_logps/chosen": -477.2846984863281, | |
| "eval_logps/rejected": -404.45428466796875, | |
| "eval_loss": 1.1951801776885986, | |
| "eval_rewards/accuracies": 0.6335078477859497, | |
| "eval_rewards/chosen": 4.191972732543945, | |
| "eval_rewards/margins": 1.5964468717575073, | |
| "eval_rewards/rejected": 2.5955255031585693, | |
| "eval_runtime": 472.8478, | |
| "eval_samples_per_second": 3.219, | |
| "eval_steps_per_second": 0.404, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.5031383879645313, | |
| "grad_norm": 94.86180114746094, | |
| "learning_rate": 1.8643774736107384e-05, | |
| "logits/chosen": -18.719505310058594, | |
| "logits/rejected": -19.4000244140625, | |
| "logps/chosen": -508.5538330078125, | |
| "logps/rejected": -445.2476806640625, | |
| "loss": 1.3446, | |
| "rewards/accuracies": 0.5799999833106995, | |
| "rewards/chosen": 3.225598096847534, | |
| "rewards/margins": 1.1447488069534302, | |
| "rewards/rejected": 2.0808494091033936, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.5081199561621998, | |
| "grad_norm": 48.567291259765625, | |
| "learning_rate": 1.8617425473884855e-05, | |
| "logits/chosen": -18.674545288085938, | |
| "logits/rejected": -19.417861938476562, | |
| "logps/chosen": -519.6810302734375, | |
| "logps/rejected": -433.57940673828125, | |
| "loss": 1.3277, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 2.415245771408081, | |
| "rewards/margins": 1.2748632431030273, | |
| "rewards/rejected": 1.1403824090957642, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.5131015243598684, | |
| "grad_norm": 6.8724141120910645, | |
| "learning_rate": 1.859084169918871e-05, | |
| "logits/chosen": -18.91655731201172, | |
| "logits/rejected": -19.582307815551758, | |
| "logps/chosen": -490.5546569824219, | |
| "logps/rejected": -427.4134521484375, | |
| "loss": 1.5115, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 2.807913303375244, | |
| "rewards/margins": 1.6051901578903198, | |
| "rewards/rejected": 1.2027232646942139, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.5180830925575372, | |
| "grad_norm": 11.200637817382812, | |
| "learning_rate": 1.8564024135463173e-05, | |
| "logits/chosen": -18.65709114074707, | |
| "logits/rejected": -18.75111198425293, | |
| "logps/chosen": -456.7597961425781, | |
| "logps/rejected": -418.4756774902344, | |
| "loss": 1.4256, | |
| "rewards/accuracies": 0.5699999928474426, | |
| "rewards/chosen": 1.6474815607070923, | |
| "rewards/margins": 0.9819788336753845, | |
| "rewards/rejected": 0.6655027866363525, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.5230646607552057, | |
| "grad_norm": 14.02713394165039, | |
| "learning_rate": 1.8536973512514762e-05, | |
| "logits/chosen": -18.270898818969727, | |
| "logits/rejected": -18.737123489379883, | |
| "logps/chosen": -495.671875, | |
| "logps/rejected": -404.7850646972656, | |
| "loss": 1.1738, | |
| "rewards/accuracies": 0.6700000166893005, | |
| "rewards/chosen": 3.6311376094818115, | |
| "rewards/margins": 1.3220287561416626, | |
| "rewards/rejected": 2.3091087341308594, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.5280462289528743, | |
| "grad_norm": 88.36447143554688, | |
| "learning_rate": 1.85096905664924e-05, | |
| "logits/chosen": -18.358213424682617, | |
| "logits/rejected": -18.569581985473633, | |
| "logps/chosen": -460.0679931640625, | |
| "logps/rejected": -420.5664978027344, | |
| "loss": 1.4962, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 1.870842456817627, | |
| "rewards/margins": 1.0018635988235474, | |
| "rewards/rejected": 0.8689790368080139, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.5330277971505429, | |
| "grad_norm": 2.6545143127441406, | |
| "learning_rate": 1.848217603986739e-05, | |
| "logits/chosen": -18.19515037536621, | |
| "logits/rejected": -18.503700256347656, | |
| "logps/chosen": -482.6021423339844, | |
| "logps/rejected": -439.8750915527344, | |
| "loss": 1.3315, | |
| "rewards/accuracies": 0.5899999737739563, | |
| "rewards/chosen": 2.491628408432007, | |
| "rewards/margins": 0.7911645770072937, | |
| "rewards/rejected": 1.700463891029358, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.5380093653482116, | |
| "grad_norm": 54.594303131103516, | |
| "learning_rate": 1.845443068141322e-05, | |
| "logits/chosen": -18.29205894470215, | |
| "logits/rejected": -19.060501098632812, | |
| "logps/chosen": -499.4178466796875, | |
| "logps/rejected": -410.4550476074219, | |
| "loss": 1.3476, | |
| "rewards/accuracies": 0.6200000047683716, | |
| "rewards/chosen": 3.079913377761841, | |
| "rewards/margins": 1.4285519123077393, | |
| "rewards/rejected": 1.6513612270355225, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.5429909335458802, | |
| "grad_norm": 7.620671272277832, | |
| "learning_rate": 1.8426455246185177e-05, | |
| "logits/chosen": -18.482242584228516, | |
| "logits/rejected": -19.06051254272461, | |
| "logps/chosen": -488.9962463378906, | |
| "logps/rejected": -414.5852355957031, | |
| "loss": 1.118, | |
| "rewards/accuracies": 0.7099999785423279, | |
| "rewards/chosen": 3.6191320419311523, | |
| "rewards/margins": 1.9326629638671875, | |
| "rewards/rejected": 1.6864690780639648, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.5479725017435488, | |
| "grad_norm": 44.31614685058594, | |
| "learning_rate": 1.8398250495499796e-05, | |
| "logits/chosen": -18.507394790649414, | |
| "logits/rejected": -19.16678810119629, | |
| "logps/chosen": -496.0931701660156, | |
| "logps/rejected": -424.0758056640625, | |
| "loss": 1.3294, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 3.764357805252075, | |
| "rewards/margins": 1.4526183605194092, | |
| "rewards/rejected": 2.311739444732666, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.5529540699412175, | |
| "grad_norm": 113.41361999511719, | |
| "learning_rate": 1.8369817196914145e-05, | |
| "logits/chosen": -18.77853775024414, | |
| "logits/rejected": -19.550212860107422, | |
| "logps/chosen": -458.4302978515625, | |
| "logps/rejected": -376.8597717285156, | |
| "loss": 1.2723, | |
| "rewards/accuracies": 0.6200000047683716, | |
| "rewards/chosen": 2.409397602081299, | |
| "rewards/margins": 1.0338749885559082, | |
| "rewards/rejected": 1.375522255897522, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.5579356381388861, | |
| "grad_norm": 39.32530212402344, | |
| "learning_rate": 1.8341156124204943e-05, | |
| "logits/chosen": -18.80110740661621, | |
| "logits/rejected": -19.126850128173828, | |
| "logps/chosen": -444.9359436035156, | |
| "logps/rejected": -403.28546142578125, | |
| "loss": 1.2621, | |
| "rewards/accuracies": 0.6299999952316284, | |
| "rewards/chosen": 3.3543567657470703, | |
| "rewards/margins": 1.4829553365707397, | |
| "rewards/rejected": 1.8714015483856201, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.5629172063365547, | |
| "grad_norm": 0.01231900043785572, | |
| "learning_rate": 1.8312268057347488e-05, | |
| "logits/chosen": -19.005640029907227, | |
| "logits/rejected": -19.596464157104492, | |
| "logps/chosen": -466.5137634277344, | |
| "logps/rejected": -408.3565673828125, | |
| "loss": 1.2726, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 2.5804996490478516, | |
| "rewards/margins": 1.2464163303375244, | |
| "rewards/rejected": 1.3340831995010376, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.5678987745342233, | |
| "grad_norm": 78.91645050048828, | |
| "learning_rate": 1.8283153782494457e-05, | |
| "logits/chosen": -19.66242218017578, | |
| "logits/rejected": -20.03885269165039, | |
| "logps/chosen": -495.5252380371094, | |
| "logps/rejected": -428.739501953125, | |
| "loss": 1.1003, | |
| "rewards/accuracies": 0.6299999952316284, | |
| "rewards/chosen": 0.8916977047920227, | |
| "rewards/margins": 2.0576887130737305, | |
| "rewards/rejected": -1.165990948677063, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.572880342731892, | |
| "grad_norm": 2.7297961711883545, | |
| "learning_rate": 1.8253814091954476e-05, | |
| "logits/chosen": -19.751190185546875, | |
| "logits/rejected": -20.76055335998535, | |
| "logps/chosen": -470.715576171875, | |
| "logps/rejected": -401.6990661621094, | |
| "loss": 1.1496, | |
| "rewards/accuracies": 0.6100000143051147, | |
| "rewards/chosen": -0.058783989399671555, | |
| "rewards/margins": 1.678650975227356, | |
| "rewards/rejected": -1.7374348640441895, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.5778619109295606, | |
| "grad_norm": 11.752656936645508, | |
| "learning_rate": 1.8224249784170595e-05, | |
| "logits/chosen": -19.580923080444336, | |
| "logits/rejected": -20.732593536376953, | |
| "logps/chosen": -517.3013916015625, | |
| "logps/rejected": -441.9253845214844, | |
| "loss": 1.2111, | |
| "rewards/accuracies": 0.6700000166893005, | |
| "rewards/chosen": -0.6827618479728699, | |
| "rewards/margins": 1.910689115524292, | |
| "rewards/rejected": -2.5934510231018066, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.5828434791272292, | |
| "grad_norm": 96.53925323486328, | |
| "learning_rate": 1.8194461663698524e-05, | |
| "logits/chosen": -19.67738914489746, | |
| "logits/rejected": -21.431556701660156, | |
| "logps/chosen": -518.2506103515625, | |
| "logps/rejected": -389.5613708496094, | |
| "loss": 1.3043, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.12440891563892365, | |
| "rewards/margins": 2.5978763103485107, | |
| "rewards/rejected": -2.4734673500061035, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.5878250473248979, | |
| "grad_norm": 62.21799850463867, | |
| "learning_rate": 1.8164450541184768e-05, | |
| "logits/chosen": -19.013898849487305, | |
| "logits/rejected": -19.318574905395508, | |
| "logps/chosen": -564.2166137695312, | |
| "logps/rejected": -528.39111328125, | |
| "loss": 1.3621, | |
| "rewards/accuracies": 0.6299999952316284, | |
| "rewards/chosen": 0.12132181972265244, | |
| "rewards/margins": 1.5959105491638184, | |
| "rewards/rejected": -1.4745885133743286, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.5928066155225665, | |
| "grad_norm": 0.12062743306159973, | |
| "learning_rate": 1.8134217233344556e-05, | |
| "logits/chosen": -19.182098388671875, | |
| "logits/rejected": -19.83804702758789, | |
| "logps/chosen": -525.335693359375, | |
| "logps/rejected": -455.0185852050781, | |
| "loss": 1.3079, | |
| "rewards/accuracies": 0.6299999952316284, | |
| "rewards/chosen": -0.4299677908420563, | |
| "rewards/margins": 1.420630931854248, | |
| "rewards/rejected": -1.8505988121032715, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.5977881837202351, | |
| "grad_norm": 4.478858470916748, | |
| "learning_rate": 1.81037625629396e-05, | |
| "logits/chosen": -18.84477996826172, | |
| "logits/rejected": -19.84359359741211, | |
| "logps/chosen": -519.6325073242188, | |
| "logps/rejected": -437.3699951171875, | |
| "loss": 1.2444, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 1.3549919128417969, | |
| "rewards/margins": 2.0743324756622314, | |
| "rewards/rejected": -0.7193406820297241, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.6027697519179037, | |
| "grad_norm": 43.38969039916992, | |
| "learning_rate": 1.8073087358755735e-05, | |
| "logits/chosen": -18.777620315551758, | |
| "logits/rejected": -19.37495231628418, | |
| "logps/chosen": -485.4015197753906, | |
| "logps/rejected": -413.2806396484375, | |
| "loss": 1.2657, | |
| "rewards/accuracies": 0.6100000143051147, | |
| "rewards/chosen": 2.3500404357910156, | |
| "rewards/margins": 2.2481086254119873, | |
| "rewards/rejected": 0.10193166881799698, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.6077513201155724, | |
| "grad_norm": 29.47319984436035, | |
| "learning_rate": 1.804219245558033e-05, | |
| "logits/chosen": -18.747379302978516, | |
| "logits/rejected": -19.17096519470215, | |
| "logps/chosen": -476.1571044921875, | |
| "logps/rejected": -414.704345703125, | |
| "loss": 1.3264, | |
| "rewards/accuracies": 0.5799999833106995, | |
| "rewards/chosen": 1.9210524559020996, | |
| "rewards/margins": 0.9324368238449097, | |
| "rewards/rejected": 0.9886155724525452, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.612732888313241, | |
| "grad_norm": 57.94328689575195, | |
| "learning_rate": 1.8011078694179602e-05, | |
| "logits/chosen": -18.417835235595703, | |
| "logits/rejected": -18.728105545043945, | |
| "logps/chosen": -466.6083068847656, | |
| "logps/rejected": -417.28936767578125, | |
| "loss": 1.3787, | |
| "rewards/accuracies": 0.5899999737739563, | |
| "rewards/chosen": 2.9616811275482178, | |
| "rewards/margins": 0.9320456981658936, | |
| "rewards/rejected": 2.0296356678009033, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.6177144565109096, | |
| "grad_norm": 0.6535269021987915, | |
| "learning_rate": 1.7979746921275713e-05, | |
| "logits/chosen": -18.470064163208008, | |
| "logits/rejected": -19.071678161621094, | |
| "logps/chosen": -499.7461242675781, | |
| "logps/rejected": -414.9991455078125, | |
| "loss": 1.0886, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 2.9048681259155273, | |
| "rewards/margins": 1.7158997058868408, | |
| "rewards/rejected": 1.188968300819397, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.6226960247085782, | |
| "grad_norm": 47.690162658691406, | |
| "learning_rate": 1.794819798952374e-05, | |
| "logits/chosen": -18.49652862548828, | |
| "logits/rejected": -18.98128318786621, | |
| "logps/chosen": -571.6408081054688, | |
| "logps/rejected": -473.7767333984375, | |
| "loss": 1.2108, | |
| "rewards/accuracies": 0.7099999785423279, | |
| "rewards/chosen": 2.7018349170684814, | |
| "rewards/margins": 2.354581832885742, | |
| "rewards/rejected": 0.34725311398506165, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.6276775929062469, | |
| "grad_norm": 69.55400085449219, | |
| "learning_rate": 1.7916432757488467e-05, | |
| "logits/chosen": -19.46697235107422, | |
| "logits/rejected": -20.09600830078125, | |
| "logps/chosen": -524.7301635742188, | |
| "logps/rejected": -433.07293701171875, | |
| "loss": 1.0183, | |
| "rewards/accuracies": 0.6700000166893005, | |
| "rewards/chosen": 1.3079893589019775, | |
| "rewards/margins": 3.2492458820343018, | |
| "rewards/rejected": -1.9412565231323242, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.6326591611039155, | |
| "grad_norm": 14.792251586914062, | |
| "learning_rate": 1.7884452089621012e-05, | |
| "logits/chosen": -19.28809928894043, | |
| "logits/rejected": -20.2492733001709, | |
| "logps/chosen": -578.1820068359375, | |
| "logps/rejected": -456.3279724121094, | |
| "loss": 1.1159, | |
| "rewards/accuracies": 0.6700000166893005, | |
| "rewards/chosen": 1.191977858543396, | |
| "rewards/margins": 2.5655128955841064, | |
| "rewards/rejected": -1.37353515625, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.6376407293015841, | |
| "grad_norm": 0.21572743356227875, | |
| "learning_rate": 1.7852256856235318e-05, | |
| "logits/chosen": -19.648353576660156, | |
| "logits/rejected": -20.134416580200195, | |
| "logps/chosen": -495.5775146484375, | |
| "logps/rejected": -438.1684265136719, | |
| "loss": 1.387, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": -0.19859656691551208, | |
| "rewards/margins": 2.191715955734253, | |
| "rewards/rejected": -2.390312433242798, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.6426222974992528, | |
| "grad_norm": 102.35507202148438, | |
| "learning_rate": 1.7819847933484467e-05, | |
| "logits/chosen": -19.353174209594727, | |
| "logits/rejected": -20.048927307128906, | |
| "logps/chosen": -524.4760131835938, | |
| "logps/rejected": -446.4917907714844, | |
| "loss": 1.1967, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.8889893293380737, | |
| "rewards/margins": 1.9711395502090454, | |
| "rewards/rejected": -1.0821502208709717, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.6476038656969214, | |
| "grad_norm": 0.06541766971349716, | |
| "learning_rate": 1.778722620333681e-05, | |
| "logits/chosen": -19.828271865844727, | |
| "logits/rejected": -20.194868087768555, | |
| "logps/chosen": -602.9769287109375, | |
| "logps/rejected": -530.201904296875, | |
| "loss": 1.6761, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -2.2178092002868652, | |
| "rewards/margins": 1.5633704662322998, | |
| "rewards/rejected": -3.781179904937744, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.65258543389459, | |
| "grad_norm": 58.162445068359375, | |
| "learning_rate": 1.775439255355201e-05, | |
| "logits/chosen": -19.331708908081055, | |
| "logits/rejected": -19.971097946166992, | |
| "logps/chosen": -570.8577880859375, | |
| "logps/rejected": -473.57196044921875, | |
| "loss": 1.1495, | |
| "rewards/accuracies": 0.6700000166893005, | |
| "rewards/chosen": 0.24211058020591736, | |
| "rewards/margins": 2.4019970893859863, | |
| "rewards/rejected": -2.159886598587036, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.6575670020922586, | |
| "grad_norm": 1.0752054452896118, | |
| "learning_rate": 1.772134787765684e-05, | |
| "logits/chosen": -19.27989959716797, | |
| "logits/rejected": -19.537317276000977, | |
| "logps/chosen": -541.393798828125, | |
| "logps/rejected": -501.2279968261719, | |
| "loss": 1.6051, | |
| "rewards/accuracies": 0.5799999833106995, | |
| "rewards/chosen": -1.2508366107940674, | |
| "rewards/margins": 0.7745574712753296, | |
| "rewards/rejected": -2.0253942012786865, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.6625485702899273, | |
| "grad_norm": 8.162809371948242, | |
| "learning_rate": 1.768809307492089e-05, | |
| "logits/chosen": -18.722593307495117, | |
| "logits/rejected": -19.011571884155273, | |
| "logps/chosen": -513.6095581054688, | |
| "logps/rejected": -469.2226257324219, | |
| "loss": 1.3372, | |
| "rewards/accuracies": 0.5400000214576721, | |
| "rewards/chosen": -0.9962272047996521, | |
| "rewards/margins": 0.9051995873451233, | |
| "rewards/rejected": -1.9014270305633545, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.6675301384875959, | |
| "grad_norm": 97.5845947265625, | |
| "learning_rate": 1.765462905033209e-05, | |
| "logits/chosen": -19.051023483276367, | |
| "logits/rejected": -19.420806884765625, | |
| "logps/chosen": -478.5913391113281, | |
| "logps/rejected": -437.26995849609375, | |
| "loss": 1.3999, | |
| "rewards/accuracies": 0.6299999952316284, | |
| "rewards/chosen": -1.0214941501617432, | |
| "rewards/margins": 0.9453433752059937, | |
| "rewards/rejected": -1.9668372869491577, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.6725117066852645, | |
| "grad_norm": 29.43077850341797, | |
| "learning_rate": 1.762095671457209e-05, | |
| "logits/chosen": -19.13440704345703, | |
| "logits/rejected": -19.925010681152344, | |
| "logps/chosen": -496.3144836425781, | |
| "logps/rejected": -413.5525817871094, | |
| "loss": 1.3997, | |
| "rewards/accuracies": 0.6100000143051147, | |
| "rewards/chosen": -0.08645965903997421, | |
| "rewards/margins": 1.8843421936035156, | |
| "rewards/rejected": -1.970801830291748, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.6774932748829331, | |
| "grad_norm": 0.7588065266609192, | |
| "learning_rate": 1.7587076983991457e-05, | |
| "logits/chosen": -19.021947860717773, | |
| "logits/rejected": -19.49304962158203, | |
| "logps/chosen": -520.0108032226562, | |
| "logps/rejected": -473.15020751953125, | |
| "loss": 1.9126, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 1.3220689296722412, | |
| "rewards/margins": 0.6901782155036926, | |
| "rewards/rejected": 0.631890594959259, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.6824748430806018, | |
| "grad_norm": 110.71784973144531, | |
| "learning_rate": 1.755299078058475e-05, | |
| "logits/chosen": -19.794466018676758, | |
| "logits/rejected": -20.945425033569336, | |
| "logps/chosen": -485.5846862792969, | |
| "logps/rejected": -422.5166931152344, | |
| "loss": 1.0623, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": -0.4341191351413727, | |
| "rewards/margins": 1.9454231262207031, | |
| "rewards/rejected": -2.379542589187622, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.6874564112782704, | |
| "grad_norm": 92.97657012939453, | |
| "learning_rate": 1.751869903196543e-05, | |
| "logits/chosen": -19.072101593017578, | |
| "logits/rejected": -20.458724975585938, | |
| "logps/chosen": -550.3908081054688, | |
| "logps/rejected": -454.7740173339844, | |
| "loss": 1.1903, | |
| "rewards/accuracies": 0.6299999952316284, | |
| "rewards/chosen": 0.9165298342704773, | |
| "rewards/margins": 1.906398892402649, | |
| "rewards/rejected": -0.9898689389228821, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.692437979475939, | |
| "grad_norm": 4.199160575866699, | |
| "learning_rate": 1.748420267134062e-05, | |
| "logits/chosen": -18.836036682128906, | |
| "logits/rejected": -19.922813415527344, | |
| "logps/chosen": -539.5211181640625, | |
| "logps/rejected": -479.9672546386719, | |
| "loss": 1.2681, | |
| "rewards/accuracies": 0.6100000143051147, | |
| "rewards/chosen": 1.260705828666687, | |
| "rewards/margins": 1.744788408279419, | |
| "rewards/rejected": -0.48408252000808716, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.6974195476736077, | |
| "grad_norm": 107.6252212524414, | |
| "learning_rate": 1.74495026374857e-05, | |
| "logits/chosen": -19.50172233581543, | |
| "logits/rejected": -20.288314819335938, | |
| "logps/chosen": -514.2687377929688, | |
| "logps/rejected": -449.1997375488281, | |
| "loss": 1.4695, | |
| "rewards/accuracies": 0.6299999952316284, | |
| "rewards/chosen": 0.515940248966217, | |
| "rewards/margins": 1.3783401250839233, | |
| "rewards/rejected": -0.8623998761177063, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.7024011158712763, | |
| "grad_norm": 5.941022872924805, | |
| "learning_rate": 1.7414599874718753e-05, | |
| "logits/chosen": -18.767423629760742, | |
| "logits/rejected": -19.68829917907715, | |
| "logps/chosen": -545.5341796875, | |
| "logps/rejected": -463.9657287597656, | |
| "loss": 1.1235, | |
| "rewards/accuracies": 0.6100000143051147, | |
| "rewards/chosen": 1.2197209596633911, | |
| "rewards/margins": 1.6720809936523438, | |
| "rewards/rejected": -0.4523601531982422, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.7073826840689449, | |
| "grad_norm": 26.461627960205078, | |
| "learning_rate": 1.737949533287489e-05, | |
| "logits/chosen": -18.46575355529785, | |
| "logits/rejected": -19.159351348876953, | |
| "logps/chosen": -517.9618530273438, | |
| "logps/rejected": -417.8922119140625, | |
| "loss": 1.14, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 1.9558275938034058, | |
| "rewards/margins": 2.069073438644409, | |
| "rewards/rejected": -0.11324585229158401, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.7123642522666135, | |
| "grad_norm": 1.1918169260025024, | |
| "learning_rate": 1.7344189967280383e-05, | |
| "logits/chosen": -19.000808715820312, | |
| "logits/rejected": -20.075515747070312, | |
| "logps/chosen": -474.513916015625, | |
| "logps/rejected": -400.20196533203125, | |
| "loss": 0.9665, | |
| "rewards/accuracies": 0.7099999785423279, | |
| "rewards/chosen": 1.894654393196106, | |
| "rewards/margins": 2.998011589050293, | |
| "rewards/rejected": -1.103356957435608, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.7173458204642822, | |
| "grad_norm": 60.40048599243164, | |
| "learning_rate": 1.7308684738726668e-05, | |
| "logits/chosen": -18.980615615844727, | |
| "logits/rejected": -20.142223358154297, | |
| "logps/chosen": -510.573974609375, | |
| "logps/rejected": -441.4659729003906, | |
| "loss": 1.266, | |
| "rewards/accuracies": 0.6100000143051147, | |
| "rewards/chosen": 0.2977685332298279, | |
| "rewards/margins": 1.5726754665374756, | |
| "rewards/rejected": -1.274907112121582, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.7223273886619508, | |
| "grad_norm": 0.5988157391548157, | |
| "learning_rate": 1.7272980613444206e-05, | |
| "logits/chosen": -18.941259384155273, | |
| "logits/rejected": -20.322023391723633, | |
| "logps/chosen": -531.8062744140625, | |
| "logps/rejected": -474.36785888671875, | |
| "loss": 1.2675, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.3074489235877991, | |
| "rewards/margins": 1.548563838005066, | |
| "rewards/rejected": -1.8560125827789307, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.7273089568596194, | |
| "grad_norm": 6.901514530181885, | |
| "learning_rate": 1.7237078563076178e-05, | |
| "logits/chosen": -19.498384475708008, | |
| "logits/rejected": -21.36153793334961, | |
| "logps/chosen": -511.9005126953125, | |
| "logps/rejected": -439.97003173828125, | |
| "loss": 1.2244, | |
| "rewards/accuracies": 0.6399999856948853, | |
| "rewards/chosen": -0.4899732172489166, | |
| "rewards/margins": 1.9954489469528198, | |
| "rewards/rejected": -2.485422372817993, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.7322905250572881, | |
| "grad_norm": 0.03188573196530342, | |
| "learning_rate": 1.7200979564652064e-05, | |
| "logits/chosen": -18.785024642944336, | |
| "logits/rejected": -21.15717124938965, | |
| "logps/chosen": -520.9817504882812, | |
| "logps/rejected": -426.6506652832031, | |
| "loss": 1.3959, | |
| "rewards/accuracies": 0.6200000047683716, | |
| "rewards/chosen": 1.1841310262680054, | |
| "rewards/margins": 1.9892054796218872, | |
| "rewards/rejected": -0.8050744533538818, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.7372720932549567, | |
| "grad_norm": 30.52501678466797, | |
| "learning_rate": 1.7164684600561018e-05, | |
| "logits/chosen": -18.466907501220703, | |
| "logits/rejected": -20.27123260498047, | |
| "logps/chosen": -531.89501953125, | |
| "logps/rejected": -429.2342834472656, | |
| "loss": 1.3491, | |
| "rewards/accuracies": 0.6200000047683716, | |
| "rewards/chosen": 2.848252773284912, | |
| "rewards/margins": 2.337670087814331, | |
| "rewards/rejected": 0.5105829834938049, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.7422536614526253, | |
| "grad_norm": 4.541143894195557, | |
| "learning_rate": 1.712819465852517e-05, | |
| "logits/chosen": -18.570043563842773, | |
| "logits/rejected": -20.75904083251953, | |
| "logps/chosen": -493.5054931640625, | |
| "logps/rejected": -380.9721984863281, | |
| "loss": 1.2685, | |
| "rewards/accuracies": 0.6399999856948853, | |
| "rewards/chosen": 1.8648478984832764, | |
| "rewards/margins": 2.2077205181121826, | |
| "rewards/rejected": -0.3428727388381958, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.7472352296502939, | |
| "grad_norm": 0.44224098324775696, | |
| "learning_rate": 1.7091510731572725e-05, | |
| "logits/chosen": -18.91974449157715, | |
| "logits/rejected": -20.655202865600586, | |
| "logps/chosen": -514.9723510742188, | |
| "logps/rejected": -452.38983154296875, | |
| "loss": 1.7715, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 2.1622519493103027, | |
| "rewards/margins": 1.3429455757141113, | |
| "rewards/rejected": 0.8193062543869019, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.7472352296502939, | |
| "eval_logits/chosen": -22.020517349243164, | |
| "eval_logits/rejected": -23.86900520324707, | |
| "eval_logps/chosen": -484.0854187011719, | |
| "eval_logps/rejected": -414.28009033203125, | |
| "eval_loss": 1.2986581325531006, | |
| "eval_rewards/accuracies": 0.6367800831794739, | |
| "eval_rewards/chosen": 3.5119001865386963, | |
| "eval_rewards/margins": 1.8989582061767578, | |
| "eval_rewards/rejected": 1.6129425764083862, | |
| "eval_runtime": 472.8477, | |
| "eval_samples_per_second": 3.219, | |
| "eval_steps_per_second": 0.404, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.7522167978479626, | |
| "grad_norm": 10.687481880187988, | |
| "learning_rate": 1.7054633818010954e-05, | |
| "logits/chosen": -18.568933486938477, | |
| "logits/rejected": -20.1440372467041, | |
| "logps/chosen": -456.9844055175781, | |
| "logps/rejected": -408.1793212890625, | |
| "loss": 1.0277, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 3.049287796020508, | |
| "rewards/margins": 1.8809983730316162, | |
| "rewards/rejected": 1.168289065361023, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.7571983660456312, | |
| "grad_norm": 67.85148620605469, | |
| "learning_rate": 1.7017564921399e-05, | |
| "logits/chosen": -18.714679718017578, | |
| "logits/rejected": -21.288236618041992, | |
| "logps/chosen": -508.66015625, | |
| "logps/rejected": -424.9830322265625, | |
| "loss": 1.362, | |
| "rewards/accuracies": 0.6100000143051147, | |
| "rewards/chosen": 2.725537061691284, | |
| "rewards/margins": 1.661694049835205, | |
| "rewards/rejected": 1.0638428926467896, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.7621799342432998, | |
| "grad_norm": 44.31140899658203, | |
| "learning_rate": 1.698030505052061e-05, | |
| "logits/chosen": -18.928104400634766, | |
| "logits/rejected": -20.428186416625977, | |
| "logps/chosen": -470.02459716796875, | |
| "logps/rejected": -381.4432678222656, | |
| "loss": 1.504, | |
| "rewards/accuracies": 0.5600000023841858, | |
| "rewards/chosen": 2.468132734298706, | |
| "rewards/margins": 0.8910315036773682, | |
| "rewards/rejected": 1.577101469039917, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.7671615024409684, | |
| "grad_norm": 10.858190536499023, | |
| "learning_rate": 1.6942855219356634e-05, | |
| "logits/chosen": -18.520444869995117, | |
| "logits/rejected": -20.245197296142578, | |
| "logps/chosen": -491.31854248046875, | |
| "logps/rejected": -416.8392333984375, | |
| "loss": 1.0534, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 3.1176271438598633, | |
| "rewards/margins": 1.8960695266723633, | |
| "rewards/rejected": 1.2215576171875, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.7721430706386371, | |
| "grad_norm": 0.026119831949472427, | |
| "learning_rate": 1.6905216447057467e-05, | |
| "logits/chosen": -19.040843963623047, | |
| "logits/rejected": -20.839155197143555, | |
| "logps/chosen": -495.08428955078125, | |
| "logps/rejected": -410.4775390625, | |
| "loss": 1.5746, | |
| "rewards/accuracies": 0.6200000047683716, | |
| "rewards/chosen": 2.2889349460601807, | |
| "rewards/margins": 1.6078674793243408, | |
| "rewards/rejected": 0.6810672283172607, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.7771246388363057, | |
| "grad_norm": 70.28289031982422, | |
| "learning_rate": 1.686738975791529e-05, | |
| "logits/chosen": -18.932214736938477, | |
| "logits/rejected": -20.42650032043457, | |
| "logps/chosen": -493.1498718261719, | |
| "logps/rejected": -429.498779296875, | |
| "loss": 1.4583, | |
| "rewards/accuracies": 0.5699999928474426, | |
| "rewards/chosen": 2.8957221508026123, | |
| "rewards/margins": 1.429540991783142, | |
| "rewards/rejected": 1.4661809206008911, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.7821062070339743, | |
| "grad_norm": 0.8262832164764404, | |
| "learning_rate": 1.6829376181336225e-05, | |
| "logits/chosen": -19.566686630249023, | |
| "logits/rejected": -21.254444122314453, | |
| "logps/chosen": -483.1775817871094, | |
| "logps/rejected": -451.6460266113281, | |
| "loss": 1.238, | |
| "rewards/accuracies": 0.5899999737739563, | |
| "rewards/chosen": 1.937461495399475, | |
| "rewards/margins": 1.0156903266906738, | |
| "rewards/rejected": 0.9217712879180908, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.787087775231643, | |
| "grad_norm": 10.062756538391113, | |
| "learning_rate": 1.6791176751812282e-05, | |
| "logits/chosen": -19.597667694091797, | |
| "logits/rejected": -21.418546676635742, | |
| "logps/chosen": -485.7425537109375, | |
| "logps/rejected": -417.196533203125, | |
| "loss": 1.1054, | |
| "rewards/accuracies": 0.6899999976158142, | |
| "rewards/chosen": 1.6439099311828613, | |
| "rewards/margins": 2.0514931678771973, | |
| "rewards/rejected": -0.40758341550827026, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.7920693434293116, | |
| "grad_norm": 11.171751022338867, | |
| "learning_rate": 1.675279250889324e-05, | |
| "logits/chosen": -19.189016342163086, | |
| "logits/rejected": -21.2204532623291, | |
| "logps/chosen": -523.8226318359375, | |
| "logps/rejected": -432.4778137207031, | |
| "loss": 1.2853, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 2.265528917312622, | |
| "rewards/margins": 2.169790744781494, | |
| "rewards/rejected": 0.0957380086183548, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.7970509116269802, | |
| "grad_norm": 5.786453723907471, | |
| "learning_rate": 1.6714224497158334e-05, | |
| "logits/chosen": -19.52318572998047, | |
| "logits/rejected": -22.174915313720703, | |
| "logps/chosen": -500.9818115234375, | |
| "logps/rejected": -427.1512451171875, | |
| "loss": 1.2191, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.03365034982562065, | |
| "rewards/margins": 2.265183210372925, | |
| "rewards/rejected": -2.2315328121185303, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.8020324798246488, | |
| "grad_norm": 0.03890511766076088, | |
| "learning_rate": 1.667547376618785e-05, | |
| "logits/chosen": -19.749061584472656, | |
| "logits/rejected": -21.982847213745117, | |
| "logps/chosen": -521.017822265625, | |
| "logps/rejected": -434.4532165527344, | |
| "loss": 1.6337, | |
| "rewards/accuracies": 0.5600000023841858, | |
| "rewards/chosen": 1.2707146406173706, | |
| "rewards/margins": 2.021421432495117, | |
| "rewards/rejected": -0.7507067918777466, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 0.8070140480223175, | |
| "grad_norm": 7.803345680236816, | |
| "learning_rate": 1.6636541370534537e-05, | |
| "logits/chosen": -19.421598434448242, | |
| "logits/rejected": -20.04302406311035, | |
| "logps/chosen": -481.74334716796875, | |
| "logps/rejected": -437.2704162597656, | |
| "loss": 1.5953, | |
| "rewards/accuracies": 0.5799999833106995, | |
| "rewards/chosen": 1.658532738685608, | |
| "rewards/margins": 1.250680685043335, | |
| "rewards/rejected": 0.40785208344459534, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.8119956162199861, | |
| "grad_norm": 0.0178745836019516, | |
| "learning_rate": 1.6597428369694934e-05, | |
| "logits/chosen": -18.97728157043457, | |
| "logits/rejected": -20.238012313842773, | |
| "logps/chosen": -544.66015625, | |
| "logps/rejected": -475.9863586425781, | |
| "loss": 1.2509, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 1.6321719884872437, | |
| "rewards/margins": 1.595544695854187, | |
| "rewards/rejected": 0.03662717714905739, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 0.8169771844176547, | |
| "grad_norm": 115.91627502441406, | |
| "learning_rate": 1.655813582808051e-05, | |
| "logits/chosen": -19.31316566467285, | |
| "logits/rejected": -20.653505325317383, | |
| "logps/chosen": -523.737060546875, | |
| "logps/rejected": -466.0438232421875, | |
| "loss": 1.4616, | |
| "rewards/accuracies": 0.5600000023841858, | |
| "rewards/chosen": 1.028465986251831, | |
| "rewards/margins": 0.8755133152008057, | |
| "rewards/rejected": 0.15295258164405823, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.8219587526153233, | |
| "grad_norm": 19.2689266204834, | |
| "learning_rate": 1.651866481498873e-05, | |
| "logits/chosen": -19.784204483032227, | |
| "logits/rejected": -21.526857376098633, | |
| "logps/chosen": -484.1365966796875, | |
| "logps/rejected": -423.5486755371094, | |
| "loss": 1.0354, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 1.5439956188201904, | |
| "rewards/margins": 2.2780568599700928, | |
| "rewards/rejected": -0.7340614199638367, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.826940320812992, | |
| "grad_norm": 22.2554988861084, | |
| "learning_rate": 1.6479016404573916e-05, | |
| "logits/chosen": -19.650360107421875, | |
| "logits/rejected": -21.203350067138672, | |
| "logps/chosen": -505.99871826171875, | |
| "logps/rejected": -474.4420166015625, | |
| "loss": 1.6364, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.20957961678504944, | |
| "rewards/margins": 1.9317030906677246, | |
| "rewards/rejected": -2.141282558441162, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.8319218890106606, | |
| "grad_norm": 44.10676574707031, | |
| "learning_rate": 1.6439191675818056e-05, | |
| "logits/chosen": -19.524065017700195, | |
| "logits/rejected": -22.584871292114258, | |
| "logps/chosen": -469.15264892578125, | |
| "logps/rejected": -362.7622985839844, | |
| "loss": 0.9529, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": -0.1507752537727356, | |
| "rewards/margins": 2.6078405380249023, | |
| "rewards/rejected": -2.7586159706115723, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 0.8369034572083291, | |
| "grad_norm": 143.54794311523438, | |
| "learning_rate": 1.6399191712501417e-05, | |
| "logits/chosen": -19.261682510375977, | |
| "logits/rejected": -21.39293670654297, | |
| "logps/chosen": -543.3509521484375, | |
| "logps/rejected": -472.142822265625, | |
| "loss": 1.4329, | |
| "rewards/accuracies": 0.6200000047683716, | |
| "rewards/chosen": 1.02738356590271, | |
| "rewards/margins": 1.6261159181594849, | |
| "rewards/rejected": -0.5987322926521301, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.8418850254059979, | |
| "grad_norm": 0.004016869701445103, | |
| "learning_rate": 1.6359017603173043e-05, | |
| "logits/chosen": -19.182754516601562, | |
| "logits/rejected": -21.46161651611328, | |
| "logps/chosen": -525.286376953125, | |
| "logps/rejected": -444.2633972167969, | |
| "loss": 1.361, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 0.7399688959121704, | |
| "rewards/margins": 2.1688730716705322, | |
| "rewards/rejected": -1.4289040565490723, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 0.8468665936036665, | |
| "grad_norm": 85.75580596923828, | |
| "learning_rate": 1.6318670441121157e-05, | |
| "logits/chosen": -19.6074161529541, | |
| "logits/rejected": -20.843006134033203, | |
| "logps/chosen": -514.9994506835938, | |
| "logps/rejected": -458.54803466796875, | |
| "loss": 1.9907, | |
| "rewards/accuracies": 0.5799999833106995, | |
| "rewards/chosen": -0.5896009802818298, | |
| "rewards/margins": 1.107519507408142, | |
| "rewards/rejected": -1.6971205472946167, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.851848161801335, | |
| "grad_norm": 0.005596471484750509, | |
| "learning_rate": 1.6278151324343395e-05, | |
| "logits/chosen": -18.718494415283203, | |
| "logits/rejected": -19.837169647216797, | |
| "logps/chosen": -527.9190673828125, | |
| "logps/rejected": -452.2406005859375, | |
| "loss": 0.9701, | |
| "rewards/accuracies": 0.6299999952316284, | |
| "rewards/chosen": -0.13445429503917694, | |
| "rewards/margins": 2.05269193649292, | |
| "rewards/rejected": -2.1871461868286133, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 0.8568297299990036, | |
| "grad_norm": 76.1462631225586, | |
| "learning_rate": 1.6237461355516918e-05, | |
| "logits/chosen": -18.651123046875, | |
| "logits/rejected": -19.52649688720703, | |
| "logps/chosen": -539.2634887695312, | |
| "logps/rejected": -478.9902648925781, | |
| "loss": 1.575, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.8822598457336426, | |
| "rewards/margins": 1.2877403497695923, | |
| "rewards/rejected": -0.4054804742336273, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.8618112981966723, | |
| "grad_norm": 90.55237579345703, | |
| "learning_rate": 1.6196601641968425e-05, | |
| "logits/chosen": -18.68253517150879, | |
| "logits/rejected": -20.140342712402344, | |
| "logps/chosen": -523.9414672851562, | |
| "logps/rejected": -457.2998046875, | |
| "loss": 1.1014, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 2.5566680431365967, | |
| "rewards/margins": 1.6561553478240967, | |
| "rewards/rejected": 0.9005125164985657, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 0.8667928663943409, | |
| "grad_norm": 3.8211495876312256, | |
| "learning_rate": 1.6155573295643993e-05, | |
| "logits/chosen": -19.115205764770508, | |
| "logits/rejected": -19.974811553955078, | |
| "logps/chosen": -527.80419921875, | |
| "logps/rejected": -491.970458984375, | |
| "loss": 1.5425, | |
| "rewards/accuracies": 0.6200000047683716, | |
| "rewards/chosen": 2.147320032119751, | |
| "rewards/margins": 0.9348466396331787, | |
| "rewards/rejected": 1.2124736309051514, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.8717744345920095, | |
| "grad_norm": 12.620223999023438, | |
| "learning_rate": 1.611437743307884e-05, | |
| "logits/chosen": -19.070911407470703, | |
| "logits/rejected": -20.162216186523438, | |
| "logps/chosen": -515.4886474609375, | |
| "logps/rejected": -440.5010070800781, | |
| "loss": 1.2029, | |
| "rewards/accuracies": 0.7200000286102295, | |
| "rewards/chosen": 2.0181946754455566, | |
| "rewards/margins": 1.9606748819351196, | |
| "rewards/rejected": 0.057520028203725815, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.8767560027896782, | |
| "grad_norm": 59.42154312133789, | |
| "learning_rate": 1.6073015175366914e-05, | |
| "logits/chosen": -18.614526748657227, | |
| "logits/rejected": -19.882549285888672, | |
| "logps/chosen": -515.3157958984375, | |
| "logps/rejected": -447.471435546875, | |
| "loss": 1.3499, | |
| "rewards/accuracies": 0.5600000023841858, | |
| "rewards/chosen": 2.700169563293457, | |
| "rewards/margins": 0.998585045337677, | |
| "rewards/rejected": 1.701583981513977, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.8817375709873468, | |
| "grad_norm": 66.42816925048828, | |
| "learning_rate": 1.603148764813042e-05, | |
| "logits/chosen": -18.400327682495117, | |
| "logits/rejected": -19.4934024810791, | |
| "logps/chosen": -477.818603515625, | |
| "logps/rejected": -401.1089782714844, | |
| "loss": 1.1853, | |
| "rewards/accuracies": 0.5799999833106995, | |
| "rewards/chosen": 3.3441994190216064, | |
| "rewards/margins": 1.493272066116333, | |
| "rewards/rejected": 1.8509272336959839, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 0.8867191391850154, | |
| "grad_norm": 0.004200187046080828, | |
| "learning_rate": 1.5989795981489155e-05, | |
| "logits/chosen": -18.308795928955078, | |
| "logits/rejected": -19.249317169189453, | |
| "logps/chosen": -512.5999755859375, | |
| "logps/rejected": -438.9172668457031, | |
| "loss": 1.1657, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 3.686070442199707, | |
| "rewards/margins": 2.335679292678833, | |
| "rewards/rejected": 1.3503910303115845, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.891700707382684, | |
| "grad_norm": 4.083571910858154, | |
| "learning_rate": 1.5947941310029755e-05, | |
| "logits/chosen": -18.304054260253906, | |
| "logits/rejected": -19.744394302368164, | |
| "logps/chosen": -475.91485595703125, | |
| "logps/rejected": -379.3888244628906, | |
| "loss": 1.1195, | |
| "rewards/accuracies": 0.6899999976158142, | |
| "rewards/chosen": 3.1028084754943848, | |
| "rewards/margins": 1.9994087219238281, | |
| "rewards/rejected": 1.1033999919891357, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 0.8966822755803527, | |
| "grad_norm": 51.022762298583984, | |
| "learning_rate": 1.5905924772774855e-05, | |
| "logits/chosen": -18.618383407592773, | |
| "logits/rejected": -19.63192367553711, | |
| "logps/chosen": -479.7633972167969, | |
| "logps/rejected": -392.1904602050781, | |
| "loss": 1.1562, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 2.1094658374786377, | |
| "rewards/margins": 1.7458257675170898, | |
| "rewards/rejected": 0.3636399209499359, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.9016638437780213, | |
| "grad_norm": 0.00022725010057911277, | |
| "learning_rate": 1.586374751315204e-05, | |
| "logits/chosen": -19.1419620513916, | |
| "logits/rejected": -20.252761840820312, | |
| "logps/chosen": -530.3628540039062, | |
| "logps/rejected": -461.61639404296875, | |
| "loss": 1.3509, | |
| "rewards/accuracies": 0.6299999952316284, | |
| "rewards/chosen": 3.2607533931732178, | |
| "rewards/margins": 1.972005009651184, | |
| "rewards/rejected": 1.2887482643127441, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 0.9066454119756899, | |
| "grad_norm": 120.93099212646484, | |
| "learning_rate": 1.5821410678962764e-05, | |
| "logits/chosen": -19.30841636657715, | |
| "logits/rejected": -20.281227111816406, | |
| "logps/chosen": -450.5699157714844, | |
| "logps/rejected": -398.7843017578125, | |
| "loss": 1.4407, | |
| "rewards/accuracies": 0.6100000143051147, | |
| "rewards/chosen": 2.621293067932129, | |
| "rewards/margins": 1.4806004762649536, | |
| "rewards/rejected": 1.1406925916671753, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.9116269801733585, | |
| "grad_norm": 0.014844976365566254, | |
| "learning_rate": 1.5778915422351102e-05, | |
| "logits/chosen": -18.85603141784668, | |
| "logits/rejected": -19.324058532714844, | |
| "logps/chosen": -493.2317199707031, | |
| "logps/rejected": -453.1470947265625, | |
| "loss": 0.913, | |
| "rewards/accuracies": 0.6700000166893005, | |
| "rewards/chosen": 4.14647102355957, | |
| "rewards/margins": 1.9552674293518066, | |
| "rewards/rejected": 2.1912038326263428, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 0.9166085483710272, | |
| "grad_norm": 90.2131576538086, | |
| "learning_rate": 1.5736262899772407e-05, | |
| "logits/chosen": -19.094078063964844, | |
| "logits/rejected": -20.168027877807617, | |
| "logps/chosen": -512.7498779296875, | |
| "logps/rejected": -448.6697692871094, | |
| "loss": 1.1494, | |
| "rewards/accuracies": 0.6100000143051147, | |
| "rewards/chosen": 2.183354139328003, | |
| "rewards/margins": 1.9273743629455566, | |
| "rewards/rejected": 0.2559796869754791, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.9215901165686958, | |
| "grad_norm": 0.30067598819732666, | |
| "learning_rate": 1.569345427196181e-05, | |
| "logits/chosen": -19.207042694091797, | |
| "logits/rejected": -20.777149200439453, | |
| "logps/chosen": -521.3990478515625, | |
| "logps/rejected": -436.8664855957031, | |
| "loss": 1.2902, | |
| "rewards/accuracies": 0.6299999952316284, | |
| "rewards/chosen": 2.0493509769439697, | |
| "rewards/margins": 1.9620305299758911, | |
| "rewards/rejected": 0.08732038736343384, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.9265716847663644, | |
| "grad_norm": 128.35350036621094, | |
| "learning_rate": 1.5650490703902666e-05, | |
| "logits/chosen": -19.485790252685547, | |
| "logits/rejected": -20.323007583618164, | |
| "logps/chosen": -496.7759094238281, | |
| "logps/rejected": -440.96759033203125, | |
| "loss": 1.4797, | |
| "rewards/accuracies": 0.5899999737739563, | |
| "rewards/chosen": 0.3512002229690552, | |
| "rewards/margins": 1.2790648937225342, | |
| "rewards/rejected": -0.927864670753479, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.9315532529640331, | |
| "grad_norm": 23.355682373046875, | |
| "learning_rate": 1.5607373364794836e-05, | |
| "logits/chosen": -19.615062713623047, | |
| "logits/rejected": -20.484060287475586, | |
| "logps/chosen": -479.2476806640625, | |
| "logps/rejected": -415.4735412597656, | |
| "loss": 1.2546, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 1.3234963417053223, | |
| "rewards/margins": 2.4520487785339355, | |
| "rewards/rejected": -1.1285524368286133, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 0.9365348211617017, | |
| "grad_norm": 41.07186508178711, | |
| "learning_rate": 1.5564103428022855e-05, | |
| "logits/chosen": -19.126056671142578, | |
| "logits/rejected": -19.900794982910156, | |
| "logps/chosen": -530.3897094726562, | |
| "logps/rejected": -466.9271545410156, | |
| "loss": 1.1304, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 1.134138584136963, | |
| "rewards/margins": 2.5866689682006836, | |
| "rewards/rejected": -1.4525303840637207, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.9415163893593703, | |
| "grad_norm": 3.795400381088257, | |
| "learning_rate": 1.552068207112402e-05, | |
| "logits/chosen": -18.972984313964844, | |
| "logits/rejected": -19.319841384887695, | |
| "logps/chosen": -517.9902954101562, | |
| "logps/rejected": -463.4305725097656, | |
| "loss": 1.6333, | |
| "rewards/accuracies": 0.5799999833106995, | |
| "rewards/chosen": 0.04141408950090408, | |
| "rewards/margins": 1.2581309080123901, | |
| "rewards/rejected": -1.2167168855667114, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 0.9464979575570389, | |
| "grad_norm": 0.2743411362171173, | |
| "learning_rate": 1.547711047575635e-05, | |
| "logits/chosen": -18.696569442749023, | |
| "logits/rejected": -19.06745719909668, | |
| "logps/chosen": -540.3345947265625, | |
| "logps/rejected": -484.9985656738281, | |
| "loss": 1.6534, | |
| "rewards/accuracies": 0.6399999856948853, | |
| "rewards/chosen": 2.271423816680908, | |
| "rewards/margins": 1.7851731777191162, | |
| "rewards/rejected": 0.48625069856643677, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.9514795257547076, | |
| "grad_norm": 27.537288665771484, | |
| "learning_rate": 1.543338982766639e-05, | |
| "logits/chosen": -18.664011001586914, | |
| "logits/rejected": -19.458066940307617, | |
| "logps/chosen": -518.22021484375, | |
| "logps/rejected": -418.8706359863281, | |
| "loss": 1.0862, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 2.7771294116973877, | |
| "rewards/margins": 2.859891891479492, | |
| "rewards/rejected": -0.08276252448558807, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 0.9564610939523762, | |
| "grad_norm": 0.9008951783180237, | |
| "learning_rate": 1.5389521316656992e-05, | |
| "logits/chosen": -18.849185943603516, | |
| "logits/rejected": -19.619192123413086, | |
| "logps/chosen": -494.7686462402344, | |
| "logps/rejected": -408.1366882324219, | |
| "loss": 1.1158, | |
| "rewards/accuracies": 0.7300000190734863, | |
| "rewards/chosen": 2.1583621501922607, | |
| "rewards/margins": 2.396536350250244, | |
| "rewards/rejected": -0.2381744235754013, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.9614426621500448, | |
| "grad_norm": 0.1889144629240036, | |
| "learning_rate": 1.5345506136554898e-05, | |
| "logits/chosen": -18.629066467285156, | |
| "logits/rejected": -19.594579696655273, | |
| "logps/chosen": -524.626708984375, | |
| "logps/rejected": -428.97344970703125, | |
| "loss": 1.1882, | |
| "rewards/accuracies": 0.6100000143051147, | |
| "rewards/chosen": 2.8290598392486572, | |
| "rewards/margins": 2.019918203353882, | |
| "rewards/rejected": 0.8091418743133545, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 0.9664242303477134, | |
| "grad_norm": 1.7383246421813965, | |
| "learning_rate": 1.5301345485178282e-05, | |
| "logits/chosen": -18.85825538635254, | |
| "logits/rejected": -19.791507720947266, | |
| "logps/chosen": -480.490234375, | |
| "logps/rejected": -380.1109619140625, | |
| "loss": 1.2643, | |
| "rewards/accuracies": 0.6700000166893005, | |
| "rewards/chosen": 2.243396520614624, | |
| "rewards/margins": 1.7966461181640625, | |
| "rewards/rejected": 0.44675034284591675, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 0.9714057985453821, | |
| "grad_norm": 73.9409408569336, | |
| "learning_rate": 1.525704056430412e-05, | |
| "logits/chosen": -18.52286148071289, | |
| "logits/rejected": -18.89972496032715, | |
| "logps/chosen": -528.177001953125, | |
| "logps/rejected": -484.8839111328125, | |
| "loss": 1.5849, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 2.4241716861724854, | |
| "rewards/margins": 1.1765490770339966, | |
| "rewards/rejected": 1.2476229667663574, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.9763873667430507, | |
| "grad_norm": 34.82683563232422, | |
| "learning_rate": 1.5212592579635512e-05, | |
| "logits/chosen": -18.213794708251953, | |
| "logits/rejected": -19.081279754638672, | |
| "logps/chosen": -520.0574340820312, | |
| "logps/rejected": -446.2875061035156, | |
| "loss": 1.171, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 1.9521498680114746, | |
| "rewards/margins": 1.636811375617981, | |
| "rewards/rejected": 0.315338671207428, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 0.9813689349407193, | |
| "grad_norm": 8.361115455627441, | |
| "learning_rate": 1.5168002740768857e-05, | |
| "logits/chosen": -18.713205337524414, | |
| "logits/rejected": -19.391826629638672, | |
| "logps/chosen": -503.5995178222656, | |
| "logps/rejected": -472.6417236328125, | |
| "loss": 1.1879, | |
| "rewards/accuracies": 0.6399999856948853, | |
| "rewards/chosen": 1.9274494647979736, | |
| "rewards/margins": 1.9319396018981934, | |
| "rewards/rejected": -0.00449012778699398, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 0.986350503138388, | |
| "grad_norm": 0.049951426684856415, | |
| "learning_rate": 1.512327226116094e-05, | |
| "logits/chosen": -19.126710891723633, | |
| "logits/rejected": -20.143205642700195, | |
| "logps/chosen": -521.0743408203125, | |
| "logps/rejected": -409.7085876464844, | |
| "loss": 1.2551, | |
| "rewards/accuracies": 0.6299999952316284, | |
| "rewards/chosen": 1.6364871263504028, | |
| "rewards/margins": 2.0774831771850586, | |
| "rewards/rejected": -0.44099605083465576, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 0.9913320713360566, | |
| "grad_norm": 0.5147112011909485, | |
| "learning_rate": 1.507840235809591e-05, | |
| "logits/chosen": -18.839317321777344, | |
| "logits/rejected": -19.660390853881836, | |
| "logps/chosen": -511.8052062988281, | |
| "logps/rejected": -445.6004638671875, | |
| "loss": 1.422, | |
| "rewards/accuracies": 0.6299999952316284, | |
| "rewards/chosen": 1.8398367166519165, | |
| "rewards/margins": 1.5229132175445557, | |
| "rewards/rejected": 0.3169235587120056, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 0.9963136395337252, | |
| "grad_norm": 19.235801696777344, | |
| "learning_rate": 1.503339425265215e-05, | |
| "logits/chosen": -18.8520450592041, | |
| "logits/rejected": -19.381229400634766, | |
| "logps/chosen": -479.9813537597656, | |
| "logps/rejected": -451.1719970703125, | |
| "loss": 1.5368, | |
| "rewards/accuracies": 0.6100000143051147, | |
| "rewards/chosen": 1.7022852897644043, | |
| "rewards/margins": 1.5142347812652588, | |
| "rewards/rejected": 0.1880505383014679, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.9963136395337252, | |
| "eval_logits/chosen": -19.80689811706543, | |
| "eval_logits/rejected": -20.71694564819336, | |
| "eval_logps/chosen": -488.4328308105469, | |
| "eval_logps/rejected": -420.06622314453125, | |
| "eval_loss": 1.361470103263855, | |
| "eval_rewards/accuracies": 0.6511780023574829, | |
| "eval_rewards/chosen": 3.077153444290161, | |
| "eval_rewards/margins": 2.0428242683410645, | |
| "eval_rewards/rejected": 1.0343292951583862, | |
| "eval_runtime": 472.868, | |
| "eval_samples_per_second": 3.219, | |
| "eval_steps_per_second": 0.404, | |
| "step": 20000 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 60222, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |