| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 100, | |
| "global_step": 2471, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0004046944556859571, | |
| "grad_norm": 3260930.195416938, | |
| "learning_rate": 2.0161290322580643e-09, | |
| "logits/chosen": -2.216688871383667, | |
| "logits/rejected": -2.1725575923919678, | |
| "logps/chosen": -62.37783432006836, | |
| "logps/rejected": -57.61228561401367, | |
| "loss": 137728.9531, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.004046944556859571, | |
| "grad_norm": 3951641.9256235515, | |
| "learning_rate": 2.0161290322580644e-08, | |
| "logits/chosen": -2.3231096267700195, | |
| "logits/rejected": -2.3050363063812256, | |
| "logps/chosen": -109.29280090332031, | |
| "logps/rejected": -105.20187377929688, | |
| "loss": 128824.3056, | |
| "rewards/accuracies": 0.4027777910232544, | |
| "rewards/chosen": -8.181909652194008e-05, | |
| "rewards/margins": -8.144730236381292e-05, | |
| "rewards/rejected": -3.7179981404733553e-07, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.008093889113719142, | |
| "grad_norm": 3636837.085798033, | |
| "learning_rate": 4.032258064516129e-08, | |
| "logits/chosen": -2.3102259635925293, | |
| "logits/rejected": -2.3181633949279785, | |
| "logps/chosen": -102.9901351928711, | |
| "logps/rejected": -103.0818099975586, | |
| "loss": 128439.1625, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -3.3925286970770685e-06, | |
| "rewards/margins": -7.394707154162461e-06, | |
| "rewards/rejected": 4.00217959395377e-06, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.012140833670578713, | |
| "grad_norm": 4189016.609602417, | |
| "learning_rate": 6.048387096774194e-08, | |
| "logits/chosen": -2.2731196880340576, | |
| "logits/rejected": -2.261061191558838, | |
| "logps/chosen": -104.67350769042969, | |
| "logps/rejected": -116.59749603271484, | |
| "loss": 124740.475, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.00028529245173558593, | |
| "rewards/margins": 2.6274694391759112e-05, | |
| "rewards/rejected": -0.00031156709883362055, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.016187778227438283, | |
| "grad_norm": 3141568.670348898, | |
| "learning_rate": 8.064516129032257e-08, | |
| "logits/chosen": -2.3156943321228027, | |
| "logits/rejected": -2.294349193572998, | |
| "logps/chosen": -129.86062622070312, | |
| "logps/rejected": -117.5326156616211, | |
| "loss": 131411.2, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.00011367227125447243, | |
| "rewards/margins": -1.9382667233003303e-05, | |
| "rewards/rejected": -9.428960038349032e-05, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.020234722784297856, | |
| "grad_norm": 4300244.422627452, | |
| "learning_rate": 1.0080645161290321e-07, | |
| "logits/chosen": -2.271444320678711, | |
| "logits/rejected": -2.2707998752593994, | |
| "logps/chosen": -107.74246978759766, | |
| "logps/rejected": -112.56591796875, | |
| "loss": 128522.9375, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.0001433270808774978, | |
| "rewards/margins": 8.896701183402911e-05, | |
| "rewards/rejected": -0.00023229411453939974, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.024281667341157425, | |
| "grad_norm": 4087404.2243083506, | |
| "learning_rate": 1.2096774193548387e-07, | |
| "logits/chosen": -2.2509658336639404, | |
| "logits/rejected": -2.235924005508423, | |
| "logps/chosen": -98.1602783203125, | |
| "logps/rejected": -97.8387222290039, | |
| "loss": 134684.625, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -3.988773187302286e-06, | |
| "rewards/margins": 0.0002523847797419876, | |
| "rewards/rejected": -0.00025637357612140477, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.028328611898016998, | |
| "grad_norm": 3240131.8848123536, | |
| "learning_rate": 1.4112903225806453e-07, | |
| "logits/chosen": -2.3215599060058594, | |
| "logits/rejected": -2.3164916038513184, | |
| "logps/chosen": -113.9156265258789, | |
| "logps/rejected": -114.72650146484375, | |
| "loss": 127554.8875, | |
| "rewards/accuracies": 0.42500001192092896, | |
| "rewards/chosen": 0.0005882935947738588, | |
| "rewards/margins": -0.0001803641061997041, | |
| "rewards/rejected": 0.000768657773733139, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.03237555645487657, | |
| "grad_norm": 4463707.543855141, | |
| "learning_rate": 1.6129032258064515e-07, | |
| "logits/chosen": -2.197829246520996, | |
| "logits/rejected": -2.2096786499023438, | |
| "logps/chosen": -99.81291198730469, | |
| "logps/rejected": -96.83836364746094, | |
| "loss": 129532.875, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 3.9735146856401116e-05, | |
| "rewards/margins": 0.00038970523746684194, | |
| "rewards/rejected": -0.0003499701269902289, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.036422501011736136, | |
| "grad_norm": 5504977.483755038, | |
| "learning_rate": 1.814516129032258e-07, | |
| "logits/chosen": -2.2197558879852295, | |
| "logits/rejected": -2.200068712234497, | |
| "logps/chosen": -112.21453857421875, | |
| "logps/rejected": -110.07649993896484, | |
| "loss": 132607.275, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.00027725097606889904, | |
| "rewards/margins": 0.00037192669697105885, | |
| "rewards/rejected": -0.0006491777021437883, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.04046944556859571, | |
| "grad_norm": 3275423.408726695, | |
| "learning_rate": 2.0161290322580642e-07, | |
| "logits/chosen": -2.2803494930267334, | |
| "logits/rejected": -2.277498245239258, | |
| "logps/chosen": -118.47029876708984, | |
| "logps/rejected": -121.81834411621094, | |
| "loss": 129364.775, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 0.00027665990637615323, | |
| "rewards/margins": 0.0005457916995510459, | |
| "rewards/rejected": -0.00026913188048638403, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.04451639012545528, | |
| "grad_norm": 3537595.5413078354, | |
| "learning_rate": 2.2177419354838707e-07, | |
| "logits/chosen": -2.2598938941955566, | |
| "logits/rejected": -2.243565797805786, | |
| "logps/chosen": -123.01219177246094, | |
| "logps/rejected": -127.5718994140625, | |
| "loss": 128605.475, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.001143028261139989, | |
| "rewards/margins": 0.0005904460558667779, | |
| "rewards/rejected": -0.0017334744334220886, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.04856333468231485, | |
| "grad_norm": 4700408.411362441, | |
| "learning_rate": 2.4193548387096775e-07, | |
| "logits/chosen": -2.189763307571411, | |
| "logits/rejected": -2.230834484100342, | |
| "logps/chosen": -111.58349609375, | |
| "logps/rejected": -116.3633041381836, | |
| "loss": 132832.7375, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.0004227511235512793, | |
| "rewards/margins": -0.00036064969026483595, | |
| "rewards/rejected": -6.210146966623142e-05, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.052610279239174426, | |
| "grad_norm": 4839768.05175113, | |
| "learning_rate": 2.6209677419354835e-07, | |
| "logits/chosen": -2.172719717025757, | |
| "logits/rejected": -2.154069423675537, | |
| "logps/chosen": -131.54049682617188, | |
| "logps/rejected": -127.31382751464844, | |
| "loss": 126528.7375, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.0012601370690390468, | |
| "rewards/margins": 0.0014206544728949666, | |
| "rewards/rejected": -0.0026807915419340134, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.056657223796033995, | |
| "grad_norm": 4462123.98520813, | |
| "learning_rate": 2.8225806451612905e-07, | |
| "logits/chosen": -2.2781708240509033, | |
| "logits/rejected": -2.2529187202453613, | |
| "logps/chosen": -109.3487319946289, | |
| "logps/rejected": -108.7385025024414, | |
| "loss": 128939.875, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.0024039470590651035, | |
| "rewards/margins": 0.0019583911634981632, | |
| "rewards/rejected": -0.004362338222563267, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.060704168352893564, | |
| "grad_norm": 4413918.737440498, | |
| "learning_rate": 3.0241935483870965e-07, | |
| "logits/chosen": -2.0262560844421387, | |
| "logits/rejected": -2.0333077907562256, | |
| "logps/chosen": -115.6955337524414, | |
| "logps/rejected": -129.337890625, | |
| "loss": 125950.125, | |
| "rewards/accuracies": 0.4124999940395355, | |
| "rewards/chosen": -3.777583970077103e-06, | |
| "rewards/margins": -0.00044618500396609306, | |
| "rewards/rejected": 0.0004424075596034527, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.06475111290975313, | |
| "grad_norm": 4956705.44191673, | |
| "learning_rate": 3.225806451612903e-07, | |
| "logits/chosen": -2.127880096435547, | |
| "logits/rejected": -2.081531524658203, | |
| "logps/chosen": -115.7586669921875, | |
| "logps/rejected": -115.57160949707031, | |
| "loss": 127159.3875, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.0029864097014069557, | |
| "rewards/margins": 0.0028749522753059864, | |
| "rewards/rejected": -0.005861361511051655, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.0687980574666127, | |
| "grad_norm": 5249631.129700843, | |
| "learning_rate": 3.4274193548387095e-07, | |
| "logits/chosen": -1.924232840538025, | |
| "logits/rejected": -1.9467108249664307, | |
| "logps/chosen": -130.4487762451172, | |
| "logps/rejected": -133.85560607910156, | |
| "loss": 125375.3875, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.008038095198571682, | |
| "rewards/margins": 0.002187486505135894, | |
| "rewards/rejected": -0.01022558193653822, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.07284500202347227, | |
| "grad_norm": 4122924.0724422527, | |
| "learning_rate": 3.629032258064516e-07, | |
| "logits/chosen": -2.045342445373535, | |
| "logits/rejected": -2.0415282249450684, | |
| "logps/chosen": -118.37638854980469, | |
| "logps/rejected": -112.38387298583984, | |
| "loss": 126785.075, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.006844646297395229, | |
| "rewards/margins": 0.0005232656258158386, | |
| "rewards/rejected": -0.00736791267991066, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.07689194658033185, | |
| "grad_norm": 4041640.919843376, | |
| "learning_rate": 3.8306451612903225e-07, | |
| "logits/chosen": -2.0350680351257324, | |
| "logits/rejected": -2.0383336544036865, | |
| "logps/chosen": -96.37462615966797, | |
| "logps/rejected": -109.77508544921875, | |
| "loss": 123590.025, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.005209728144109249, | |
| "rewards/margins": 0.0012681197840720415, | |
| "rewards/rejected": -0.006477847695350647, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.08093889113719142, | |
| "grad_norm": 5009394.271837804, | |
| "learning_rate": 4.0322580645161285e-07, | |
| "logits/chosen": -2.0026402473449707, | |
| "logits/rejected": -1.9795843362808228, | |
| "logps/chosen": -111.41777038574219, | |
| "logps/rejected": -113.9148178100586, | |
| "loss": 126448.05, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.010595456697046757, | |
| "rewards/margins": 0.003854970680549741, | |
| "rewards/rejected": -0.01445042621344328, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.08498583569405099, | |
| "grad_norm": 5619272.0636549145, | |
| "learning_rate": 4.2338709677419355e-07, | |
| "logits/chosen": -2.1407713890075684, | |
| "logits/rejected": -2.1545071601867676, | |
| "logps/chosen": -110.00148010253906, | |
| "logps/rejected": -112.6539077758789, | |
| "loss": 128766.15, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.009294772520661354, | |
| "rewards/margins": 0.0027922452427446842, | |
| "rewards/rejected": -0.012087016366422176, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.08903278025091056, | |
| "grad_norm": 4415731.805933493, | |
| "learning_rate": 4.4354838709677415e-07, | |
| "logits/chosen": -2.3430728912353516, | |
| "logits/rejected": -2.3087539672851562, | |
| "logps/chosen": -131.23556518554688, | |
| "logps/rejected": -134.5377655029297, | |
| "loss": 132823.0125, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.007352087646722794, | |
| "rewards/margins": 0.004128883592784405, | |
| "rewards/rejected": -0.011480971239507198, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.09307972480777013, | |
| "grad_norm": 5355155.642181672, | |
| "learning_rate": 4.637096774193548e-07, | |
| "logits/chosen": -2.242619752883911, | |
| "logits/rejected": -2.2341275215148926, | |
| "logps/chosen": -127.72953033447266, | |
| "logps/rejected": -131.52122497558594, | |
| "loss": 126450.9125, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.01162335742264986, | |
| "rewards/margins": 0.005030062980949879, | |
| "rewards/rejected": -0.01665342040359974, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.0971266693646297, | |
| "grad_norm": 4222976.278932744, | |
| "learning_rate": 4.838709677419355e-07, | |
| "logits/chosen": -2.2030246257781982, | |
| "logits/rejected": -2.2015702724456787, | |
| "logps/chosen": -108.4349365234375, | |
| "logps/rejected": -110.720703125, | |
| "loss": 133174.225, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.009825309738516808, | |
| "rewards/margins": 0.003020837437361479, | |
| "rewards/rejected": -0.012846146710216999, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.10117361392148927, | |
| "grad_norm": 5332420.53657513, | |
| "learning_rate": 4.995501574448943e-07, | |
| "logits/chosen": -2.1023497581481934, | |
| "logits/rejected": -2.1095871925354004, | |
| "logps/chosen": -110.6066665649414, | |
| "logps/rejected": -117.8967056274414, | |
| "loss": 127655.45, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.006971948780119419, | |
| "rewards/margins": 0.0035569421015679836, | |
| "rewards/rejected": -0.010528890416026115, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.10522055847834885, | |
| "grad_norm": 5679375.003121498, | |
| "learning_rate": 4.973009446693657e-07, | |
| "logits/chosen": -2.199481964111328, | |
| "logits/rejected": -2.18941330909729, | |
| "logps/chosen": -117.3616943359375, | |
| "logps/rejected": -118.54112243652344, | |
| "loss": 132409.6875, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.013199470937252045, | |
| "rewards/margins": 0.001887517748400569, | |
| "rewards/rejected": -0.015086987987160683, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.10926750303520842, | |
| "grad_norm": 4610431.437626582, | |
| "learning_rate": 4.950517318938372e-07, | |
| "logits/chosen": -2.3225109577178955, | |
| "logits/rejected": -2.3390707969665527, | |
| "logps/chosen": -124.8027572631836, | |
| "logps/rejected": -129.42922973632812, | |
| "loss": 125030.1375, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.012582078576087952, | |
| "rewards/margins": 0.0034210742451250553, | |
| "rewards/rejected": -0.01600315235555172, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.11331444759206799, | |
| "grad_norm": 6556255.599818757, | |
| "learning_rate": 4.928025191183086e-07, | |
| "logits/chosen": -2.1859679222106934, | |
| "logits/rejected": -2.2081589698791504, | |
| "logps/chosen": -114.65242767333984, | |
| "logps/rejected": -124.5443344116211, | |
| "loss": 124704.9, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.016057247295975685, | |
| "rewards/margins": 0.002059857128188014, | |
| "rewards/rejected": -0.018117103725671768, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.11736139214892756, | |
| "grad_norm": 6805409.426599127, | |
| "learning_rate": 4.9055330634278e-07, | |
| "logits/chosen": -2.219308853149414, | |
| "logits/rejected": -2.214458465576172, | |
| "logps/chosen": -134.7147979736328, | |
| "logps/rejected": -142.11083984375, | |
| "loss": 127160.525, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.009862681850790977, | |
| "rewards/margins": 0.005548264365643263, | |
| "rewards/rejected": -0.015410944819450378, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.12140833670578713, | |
| "grad_norm": 6068819.6586095495, | |
| "learning_rate": 4.883040935672515e-07, | |
| "logits/chosen": -2.258293390274048, | |
| "logits/rejected": -2.228738307952881, | |
| "logps/chosen": -132.33441162109375, | |
| "logps/rejected": -141.69737243652344, | |
| "loss": 128142.7625, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.008503363467752934, | |
| "rewards/margins": 0.007645074278116226, | |
| "rewards/rejected": -0.016148436814546585, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.1254552812626467, | |
| "grad_norm": 5267448.920762056, | |
| "learning_rate": 4.860548807917229e-07, | |
| "logits/chosen": -2.2171027660369873, | |
| "logits/rejected": -2.2104790210723877, | |
| "logps/chosen": -125.05142974853516, | |
| "logps/rejected": -133.34071350097656, | |
| "loss": 125674.1, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.019455790519714355, | |
| "rewards/margins": 0.0074443453922867775, | |
| "rewards/rejected": -0.026900136843323708, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.12950222581950627, | |
| "grad_norm": 6667685.083680488, | |
| "learning_rate": 4.838056680161944e-07, | |
| "logits/chosen": -2.1860244274139404, | |
| "logits/rejected": -2.2035775184631348, | |
| "logps/chosen": -122.4665756225586, | |
| "logps/rejected": -132.46490478515625, | |
| "loss": 125480.4125, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.017699861899018288, | |
| "rewards/margins": 0.006000404246151447, | |
| "rewards/rejected": -0.02370026707649231, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.13354917037636585, | |
| "grad_norm": 6064623.088294091, | |
| "learning_rate": 4.815564552406658e-07, | |
| "logits/chosen": -2.0421011447906494, | |
| "logits/rejected": -2.057572603225708, | |
| "logps/chosen": -134.10183715820312, | |
| "logps/rejected": -144.3116912841797, | |
| "loss": 124604.7875, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.019064148887991905, | |
| "rewards/margins": 0.006390860769897699, | |
| "rewards/rejected": -0.025455012917518616, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.1375961149332254, | |
| "grad_norm": 12560788.46443257, | |
| "learning_rate": 4.793072424651372e-07, | |
| "logits/chosen": -1.9278684854507446, | |
| "logits/rejected": -1.909166693687439, | |
| "logps/chosen": -146.60585021972656, | |
| "logps/rejected": -166.07937622070312, | |
| "loss": 140379.8375, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.0366508811712265, | |
| "rewards/margins": 0.013624541461467743, | |
| "rewards/rejected": -0.050275422632694244, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.141643059490085, | |
| "grad_norm": 6487628.638191885, | |
| "learning_rate": 4.770580296896087e-07, | |
| "logits/chosen": -2.11842679977417, | |
| "logits/rejected": -2.1011595726013184, | |
| "logps/chosen": -119.56195068359375, | |
| "logps/rejected": -136.84823608398438, | |
| "loss": 130511.3625, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.0215927604585886, | |
| "rewards/margins": 0.007697033230215311, | |
| "rewards/rejected": -0.029289793223142624, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.14569000404694454, | |
| "grad_norm": 4799050.3946148325, | |
| "learning_rate": 4.7480881691408005e-07, | |
| "logits/chosen": -2.0867960453033447, | |
| "logits/rejected": -2.082698345184326, | |
| "logps/chosen": -128.99429321289062, | |
| "logps/rejected": -130.89785766601562, | |
| "loss": 127926.0, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.016279883682727814, | |
| "rewards/margins": 0.0010558776557445526, | |
| "rewards/rejected": -0.017335761338472366, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.14973694860380413, | |
| "grad_norm": 5249060.947314388, | |
| "learning_rate": 4.725596041385515e-07, | |
| "logits/chosen": -2.1251657009124756, | |
| "logits/rejected": -2.1052744388580322, | |
| "logps/chosen": -121.3799819946289, | |
| "logps/rejected": -121.27701568603516, | |
| "loss": 131676.2375, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.017586207017302513, | |
| "rewards/margins": 0.003813033225014806, | |
| "rewards/rejected": -0.0213992390781641, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.1537838931606637, | |
| "grad_norm": 5293517.066249103, | |
| "learning_rate": 4.7031039136302294e-07, | |
| "logits/chosen": -2.15531587600708, | |
| "logits/rejected": -2.153560161590576, | |
| "logps/chosen": -159.96005249023438, | |
| "logps/rejected": -153.87448120117188, | |
| "loss": 121504.05, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.017819028347730637, | |
| "rewards/margins": 0.008313515223562717, | |
| "rewards/rejected": -0.02613254450261593, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.15783083771752326, | |
| "grad_norm": 5270002.835932803, | |
| "learning_rate": 4.6806117858749433e-07, | |
| "logits/chosen": -2.1870741844177246, | |
| "logits/rejected": -2.171494483947754, | |
| "logps/chosen": -148.86929321289062, | |
| "logps/rejected": -164.97915649414062, | |
| "loss": 129892.05, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.02027943730354309, | |
| "rewards/margins": 0.0176930520683527, | |
| "rewards/rejected": -0.03797249123454094, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.16187778227438285, | |
| "grad_norm": 4898003.511863262, | |
| "learning_rate": 4.658119658119658e-07, | |
| "logits/chosen": -2.1435184478759766, | |
| "logits/rejected": -2.148679256439209, | |
| "logps/chosen": -128.7902069091797, | |
| "logps/rejected": -139.18150329589844, | |
| "loss": 122692.925, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.018103724345564842, | |
| "rewards/margins": 0.006611344870179892, | |
| "rewards/rejected": -0.02471506968140602, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.1659247268312424, | |
| "grad_norm": 4183644.18979808, | |
| "learning_rate": 4.635627530364372e-07, | |
| "logits/chosen": -2.150381565093994, | |
| "logits/rejected": -2.154317617416382, | |
| "logps/chosen": -108.93717193603516, | |
| "logps/rejected": -118.07032775878906, | |
| "loss": 126758.0375, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.021253790706396103, | |
| "rewards/margins": 0.003508577588945627, | |
| "rewards/rejected": -0.024762369692325592, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.16997167138810199, | |
| "grad_norm": 5593714.467836783, | |
| "learning_rate": 4.6131354026090867e-07, | |
| "logits/chosen": -2.180170774459839, | |
| "logits/rejected": -2.1523594856262207, | |
| "logps/chosen": -126.38621520996094, | |
| "logps/rejected": -136.35755920410156, | |
| "loss": 121196.2, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.01847546175122261, | |
| "rewards/margins": 0.0067595853470265865, | |
| "rewards/rejected": -0.025235047563910484, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.17401861594496154, | |
| "grad_norm": 3566616.7901411816, | |
| "learning_rate": 4.590643274853801e-07, | |
| "logits/chosen": -2.120450258255005, | |
| "logits/rejected": -2.150542974472046, | |
| "logps/chosen": -137.63836669921875, | |
| "logps/rejected": -141.17825317382812, | |
| "loss": 132284.5875, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.02999441884458065, | |
| "rewards/margins": 0.0025890106335282326, | |
| "rewards/rejected": -0.03258342668414116, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.17806556050182112, | |
| "grad_norm": 6039791.29116107, | |
| "learning_rate": 4.568151147098515e-07, | |
| "logits/chosen": -2.2097067832946777, | |
| "logits/rejected": -2.1825873851776123, | |
| "logps/chosen": -127.94209289550781, | |
| "logps/rejected": -137.39776611328125, | |
| "loss": 128589.475, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.023606717586517334, | |
| "rewards/margins": 0.009609794244170189, | |
| "rewards/rejected": -0.03321651369333267, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.1821125050586807, | |
| "grad_norm": 6343148.886392033, | |
| "learning_rate": 4.54565901934323e-07, | |
| "logits/chosen": -2.1717894077301025, | |
| "logits/rejected": -2.2131998538970947, | |
| "logps/chosen": -129.89688110351562, | |
| "logps/rejected": -145.33839416503906, | |
| "loss": 124381.275, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.020499037578701973, | |
| "rewards/margins": 0.013838306069374084, | |
| "rewards/rejected": -0.03433733806014061, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.18615944961554026, | |
| "grad_norm": 4981408.5070092585, | |
| "learning_rate": 4.523166891587944e-07, | |
| "logits/chosen": -2.2632086277008057, | |
| "logits/rejected": -2.306267738342285, | |
| "logps/chosen": -163.80706787109375, | |
| "logps/rejected": -155.72915649414062, | |
| "loss": 158881.6375, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.03987263888120651, | |
| "rewards/margins": -0.008330432698130608, | |
| "rewards/rejected": -0.03154221177101135, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.19020639417239985, | |
| "grad_norm": 6186406.38261752, | |
| "learning_rate": 4.500674763832658e-07, | |
| "logits/chosen": -2.4067013263702393, | |
| "logits/rejected": -2.4073116779327393, | |
| "logps/chosen": -123.8814697265625, | |
| "logps/rejected": -133.23178100585938, | |
| "loss": 129765.4625, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.01776200719177723, | |
| "rewards/margins": 0.006926923058927059, | |
| "rewards/rejected": -0.024688933044672012, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.1942533387292594, | |
| "grad_norm": 7924184.670127909, | |
| "learning_rate": 4.478182636077373e-07, | |
| "logits/chosen": -2.4064009189605713, | |
| "logits/rejected": -2.3933303356170654, | |
| "logps/chosen": -120.53520202636719, | |
| "logps/rejected": -124.30986022949219, | |
| "loss": 127188.5875, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.013866530731320381, | |
| "rewards/margins": 0.0045671057887375355, | |
| "rewards/rejected": -0.01843363419175148, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.19830028328611898, | |
| "grad_norm": 6796881.168124855, | |
| "learning_rate": 4.455690508322087e-07, | |
| "logits/chosen": -2.35581636428833, | |
| "logits/rejected": -2.276433229446411, | |
| "logps/chosen": -113.40742492675781, | |
| "logps/rejected": -126.89019775390625, | |
| "loss": 122585.6875, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.015190203674137592, | |
| "rewards/margins": 0.010421663522720337, | |
| "rewards/rejected": -0.025611868128180504, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.20234722784297854, | |
| "grad_norm": 9409785.188721178, | |
| "learning_rate": 4.433198380566802e-07, | |
| "logits/chosen": -2.200453519821167, | |
| "logits/rejected": -2.2011332511901855, | |
| "logps/chosen": -156.01809692382812, | |
| "logps/rejected": -169.92514038085938, | |
| "loss": 129704.3, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.025881418958306313, | |
| "rewards/margins": 0.010276483371853828, | |
| "rewards/rejected": -0.03615789860486984, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.20639417239983812, | |
| "grad_norm": 5757712.5781175345, | |
| "learning_rate": 4.410706252811516e-07, | |
| "logits/chosen": -2.127547025680542, | |
| "logits/rejected": -2.1388392448425293, | |
| "logps/chosen": -130.27249145507812, | |
| "logps/rejected": -145.90647888183594, | |
| "loss": 123361.8125, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.03281703591346741, | |
| "rewards/margins": 0.009785661473870277, | |
| "rewards/rejected": -0.042602695524692535, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.2104411169566977, | |
| "grad_norm": 5742087.523014036, | |
| "learning_rate": 4.3882141250562297e-07, | |
| "logits/chosen": -2.2757978439331055, | |
| "logits/rejected": -2.2460601329803467, | |
| "logps/chosen": -153.6471710205078, | |
| "logps/rejected": -165.54989624023438, | |
| "loss": 127158.9125, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.02743702568113804, | |
| "rewards/margins": 0.017125947400927544, | |
| "rewards/rejected": -0.04456297308206558, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.21448806151355726, | |
| "grad_norm": 6000988.402036818, | |
| "learning_rate": 4.3657219973009447e-07, | |
| "logits/chosen": -2.14945387840271, | |
| "logits/rejected": -2.160613775253296, | |
| "logps/chosen": -152.8687286376953, | |
| "logps/rejected": -157.02215576171875, | |
| "loss": 130855.4125, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.03712679445743561, | |
| "rewards/margins": -0.002356339478865266, | |
| "rewards/rejected": -0.03477045148611069, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.21853500607041684, | |
| "grad_norm": 7039581.88958706, | |
| "learning_rate": 4.3432298695456586e-07, | |
| "logits/chosen": -2.1952900886535645, | |
| "logits/rejected": -2.125767946243286, | |
| "logps/chosen": -121.56607818603516, | |
| "logps/rejected": -136.8863983154297, | |
| "loss": 124032.45, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.021455224603414536, | |
| "rewards/margins": 0.01183997467160225, | |
| "rewards/rejected": -0.03329520300030708, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.2225819506272764, | |
| "grad_norm": 6851510.087607766, | |
| "learning_rate": 4.3207377417903736e-07, | |
| "logits/chosen": -2.3099186420440674, | |
| "logits/rejected": -2.2750840187072754, | |
| "logps/chosen": -133.94058227539062, | |
| "logps/rejected": -165.82687377929688, | |
| "loss": 127159.35, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.02089579775929451, | |
| "rewards/margins": 0.011938202194869518, | |
| "rewards/rejected": -0.0328340008854866, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.22662889518413598, | |
| "grad_norm": 7651455.0301742535, | |
| "learning_rate": 4.2982456140350876e-07, | |
| "logits/chosen": -2.281270980834961, | |
| "logits/rejected": -2.291888475418091, | |
| "logps/chosen": -139.83163452148438, | |
| "logps/rejected": -141.5286865234375, | |
| "loss": 130547.225, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.02193923108279705, | |
| "rewards/margins": 0.0074460976757109165, | |
| "rewards/rejected": -0.02938532829284668, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.23067583974099554, | |
| "grad_norm": 4842418.287253727, | |
| "learning_rate": 4.2757534862798015e-07, | |
| "logits/chosen": -2.28908634185791, | |
| "logits/rejected": -2.2613823413848877, | |
| "logps/chosen": -130.56756591796875, | |
| "logps/rejected": -136.48858642578125, | |
| "loss": 129810.7, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.018774276599287987, | |
| "rewards/margins": 0.012544331140816212, | |
| "rewards/rejected": -0.03131860867142677, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.23472278429785512, | |
| "grad_norm": 5753286.585250832, | |
| "learning_rate": 4.2532613585245165e-07, | |
| "logits/chosen": -2.3290882110595703, | |
| "logits/rejected": -2.2913310527801514, | |
| "logps/chosen": -128.60073852539062, | |
| "logps/rejected": -144.4147491455078, | |
| "loss": 125407.5625, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.022057032212615013, | |
| "rewards/margins": 0.013317006640136242, | |
| "rewards/rejected": -0.03537403792142868, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.2387697288547147, | |
| "grad_norm": 6854533.186683347, | |
| "learning_rate": 4.2307692307692304e-07, | |
| "logits/chosen": -2.1821513175964355, | |
| "logits/rejected": -2.227368116378784, | |
| "logps/chosen": -132.9744873046875, | |
| "logps/rejected": -143.91380310058594, | |
| "loss": 119907.075, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.024536501616239548, | |
| "rewards/margins": 0.00792471133172512, | |
| "rewards/rejected": -0.03246121481060982, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.24281667341157426, | |
| "grad_norm": 7000163.800918494, | |
| "learning_rate": 4.208277103013945e-07, | |
| "logits/chosen": -2.2966506481170654, | |
| "logits/rejected": -2.274991989135742, | |
| "logps/chosen": -140.1864776611328, | |
| "logps/rejected": -142.9268798828125, | |
| "loss": 129494.7625, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.026311520487070084, | |
| "rewards/margins": 0.005165449343621731, | |
| "rewards/rejected": -0.03147696703672409, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.24686361796843384, | |
| "grad_norm": 5155538.44716785, | |
| "learning_rate": 4.1857849752586593e-07, | |
| "logits/chosen": -2.2126269340515137, | |
| "logits/rejected": -2.2339818477630615, | |
| "logps/chosen": -143.7578125, | |
| "logps/rejected": -148.81027221679688, | |
| "loss": 131088.325, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.022633636370301247, | |
| "rewards/margins": 0.005649174097925425, | |
| "rewards/rejected": -0.028282811865210533, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.2509105625252934, | |
| "grad_norm": 6494761.148749808, | |
| "learning_rate": 4.1632928475033733e-07, | |
| "logits/chosen": -2.2412619590759277, | |
| "logits/rejected": -2.215108633041382, | |
| "logps/chosen": -133.82061767578125, | |
| "logps/rejected": -144.2487030029297, | |
| "loss": 127834.35, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.023778211325407028, | |
| "rewards/margins": 0.008780455216765404, | |
| "rewards/rejected": -0.03255866840481758, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.254957507082153, | |
| "grad_norm": 6581411.527197339, | |
| "learning_rate": 4.140800719748088e-07, | |
| "logits/chosen": -2.3006882667541504, | |
| "logits/rejected": -2.279165744781494, | |
| "logps/chosen": -127.95011901855469, | |
| "logps/rejected": -144.5232696533203, | |
| "loss": 128899.5125, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.017114771530032158, | |
| "rewards/margins": 0.012585528194904327, | |
| "rewards/rejected": -0.029700294137001038, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.25900445163901253, | |
| "grad_norm": 6993144.620436077, | |
| "learning_rate": 4.118308591992802e-07, | |
| "logits/chosen": -2.288159132003784, | |
| "logits/rejected": -2.27152681350708, | |
| "logps/chosen": -116.51515197753906, | |
| "logps/rejected": -134.83572387695312, | |
| "loss": 122510.6375, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.014996351674199104, | |
| "rewards/margins": 0.018196506425738335, | |
| "rewards/rejected": -0.03319285809993744, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.2630513961958721, | |
| "grad_norm": 5352708.94864355, | |
| "learning_rate": 4.0958164642375167e-07, | |
| "logits/chosen": -2.33659029006958, | |
| "logits/rejected": -2.3185806274414062, | |
| "logps/chosen": -143.27899169921875, | |
| "logps/rejected": -154.21240234375, | |
| "loss": 128047.15, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.022664163261651993, | |
| "rewards/margins": 0.016272926703095436, | |
| "rewards/rejected": -0.03893708810210228, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.2670983407527317, | |
| "grad_norm": 5853928.770602061, | |
| "learning_rate": 4.073324336482231e-07, | |
| "logits/chosen": -2.2209713459014893, | |
| "logits/rejected": -2.197364091873169, | |
| "logps/chosen": -154.97152709960938, | |
| "logps/rejected": -164.9137725830078, | |
| "loss": 126285.6125, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.02928345464169979, | |
| "rewards/margins": 0.017678027972579002, | |
| "rewards/rejected": -0.046961478888988495, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.27114528530959126, | |
| "grad_norm": 5468563.620033422, | |
| "learning_rate": 4.0508322087269456e-07, | |
| "logits/chosen": -2.368302822113037, | |
| "logits/rejected": -2.359222888946533, | |
| "logps/chosen": -138.3487091064453, | |
| "logps/rejected": -131.19773864746094, | |
| "loss": 135010.325, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.022097600623965263, | |
| "rewards/margins": -0.0010355912381783128, | |
| "rewards/rejected": -0.021062009036540985, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.2751922298664508, | |
| "grad_norm": 5145007.282508669, | |
| "learning_rate": 4.02834008097166e-07, | |
| "logits/chosen": -2.2279224395751953, | |
| "logits/rejected": -2.227818250656128, | |
| "logps/chosen": -151.80599975585938, | |
| "logps/rejected": -155.39369201660156, | |
| "loss": 124851.875, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.023444540798664093, | |
| "rewards/margins": 0.006362411193549633, | |
| "rewards/rejected": -0.0298069529235363, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.2792391744233104, | |
| "grad_norm": 5800338.778716969, | |
| "learning_rate": 4.005847953216374e-07, | |
| "logits/chosen": -2.3348867893218994, | |
| "logits/rejected": -2.3266310691833496, | |
| "logps/chosen": -125.41386413574219, | |
| "logps/rejected": -131.49343872070312, | |
| "loss": 127372.8125, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.021091241389513016, | |
| "rewards/margins": 0.00724734365940094, | |
| "rewards/rejected": -0.028338585048913956, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.28328611898017, | |
| "grad_norm": 8105894.218684362, | |
| "learning_rate": 3.9833558254610884e-07, | |
| "logits/chosen": -2.309593677520752, | |
| "logits/rejected": -2.2981934547424316, | |
| "logps/chosen": -132.08596801757812, | |
| "logps/rejected": -137.70201110839844, | |
| "loss": 124781.725, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.021190345287322998, | |
| "rewards/margins": 0.0066665285266935825, | |
| "rewards/rejected": -0.027856875211000443, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.28733306353702953, | |
| "grad_norm": 5039380.169629858, | |
| "learning_rate": 3.960863697705803e-07, | |
| "logits/chosen": -2.315074920654297, | |
| "logits/rejected": -2.3194656372070312, | |
| "logps/chosen": -147.24713134765625, | |
| "logps/rejected": -158.99636840820312, | |
| "loss": 128105.9625, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.018308859318494797, | |
| "rewards/margins": 0.00702436501160264, | |
| "rewards/rejected": -0.0253332257270813, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.2913800080938891, | |
| "grad_norm": 6770507.385238732, | |
| "learning_rate": 3.9383715699505173e-07, | |
| "logits/chosen": -2.3582499027252197, | |
| "logits/rejected": -2.307143211364746, | |
| "logps/chosen": -141.00454711914062, | |
| "logps/rejected": -145.4442901611328, | |
| "loss": 128073.85, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.02170463278889656, | |
| "rewards/margins": 0.004128533415496349, | |
| "rewards/rejected": -0.025833168998360634, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.2954269526507487, | |
| "grad_norm": 6843599.0452563455, | |
| "learning_rate": 3.9158794421952313e-07, | |
| "logits/chosen": -2.2773690223693848, | |
| "logits/rejected": -2.2705273628234863, | |
| "logps/chosen": -127.78352355957031, | |
| "logps/rejected": -128.8694305419922, | |
| "loss": 133363.3375, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.019749607890844345, | |
| "rewards/margins": 0.0013956364709883928, | |
| "rewards/rejected": -0.02114524319767952, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.29947389720760825, | |
| "grad_norm": 6414207.014030725, | |
| "learning_rate": 3.893387314439946e-07, | |
| "logits/chosen": -2.2219457626342773, | |
| "logits/rejected": -2.1614620685577393, | |
| "logps/chosen": -138.95530700683594, | |
| "logps/rejected": -159.24916076660156, | |
| "loss": 125832.575, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.01657973788678646, | |
| "rewards/margins": 0.015021143481135368, | |
| "rewards/rejected": -0.03160088509321213, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.3035208417644678, | |
| "grad_norm": 6251391.785537995, | |
| "learning_rate": 3.87089518668466e-07, | |
| "logits/chosen": -2.216029167175293, | |
| "logits/rejected": -2.2095859050750732, | |
| "logps/chosen": -139.25477600097656, | |
| "logps/rejected": -146.38613891601562, | |
| "loss": 126431.6625, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.02343796379864216, | |
| "rewards/margins": 0.012653100304305553, | |
| "rewards/rejected": -0.03609105944633484, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.3075677863213274, | |
| "grad_norm": 5534957.115190962, | |
| "learning_rate": 3.8484030589293747e-07, | |
| "logits/chosen": -2.2073702812194824, | |
| "logits/rejected": -2.209057569503784, | |
| "logps/chosen": -130.53199768066406, | |
| "logps/rejected": -137.91250610351562, | |
| "loss": 127669.2, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.01518569327890873, | |
| "rewards/margins": 0.007037720177322626, | |
| "rewards/rejected": -0.02222341299057007, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.311614730878187, | |
| "grad_norm": 4890109.310049175, | |
| "learning_rate": 3.825910931174089e-07, | |
| "logits/chosen": -2.225956678390503, | |
| "logits/rejected": -2.210540294647217, | |
| "logps/chosen": -127.26595306396484, | |
| "logps/rejected": -133.6049041748047, | |
| "loss": 124534.6875, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.02195551246404648, | |
| "rewards/margins": 0.006126697175204754, | |
| "rewards/rejected": -0.02808220684528351, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.31566167543504653, | |
| "grad_norm": 6427608.185533696, | |
| "learning_rate": 3.803418803418803e-07, | |
| "logits/chosen": -2.2634310722351074, | |
| "logits/rejected": -2.245199203491211, | |
| "logps/chosen": -137.40240478515625, | |
| "logps/rejected": -143.7775115966797, | |
| "loss": 129704.1875, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.022509312257170677, | |
| "rewards/margins": 0.003963272087275982, | |
| "rewards/rejected": -0.026472587138414383, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.3197086199919061, | |
| "grad_norm": 6730619.873094239, | |
| "learning_rate": 3.7809266756635175e-07, | |
| "logits/chosen": -2.1104772090911865, | |
| "logits/rejected": -2.0919671058654785, | |
| "logps/chosen": -125.5869369506836, | |
| "logps/rejected": -133.48800659179688, | |
| "loss": 125677.675, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.024079788476228714, | |
| "rewards/margins": 0.007324723992496729, | |
| "rewards/rejected": -0.0314045175909996, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.3237555645487657, | |
| "grad_norm": 6156066.531026818, | |
| "learning_rate": 3.758434547908232e-07, | |
| "logits/chosen": -2.213543176651001, | |
| "logits/rejected": -2.1960647106170654, | |
| "logps/chosen": -145.46665954589844, | |
| "logps/rejected": -159.2154541015625, | |
| "loss": 121552.525, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.021815134212374687, | |
| "rewards/margins": 0.014243106357753277, | |
| "rewards/rejected": -0.03605823963880539, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.32780250910562525, | |
| "grad_norm": 6503545.886073305, | |
| "learning_rate": 3.735942420152946e-07, | |
| "logits/chosen": -2.120095729827881, | |
| "logits/rejected": -2.0986738204956055, | |
| "logps/chosen": -134.55508422851562, | |
| "logps/rejected": -152.37815856933594, | |
| "loss": 122828.6875, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.025746628642082214, | |
| "rewards/margins": 0.01750759594142437, | |
| "rewards/rejected": -0.043254222720861435, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.3318494536624848, | |
| "grad_norm": 5263993.227861122, | |
| "learning_rate": 3.713450292397661e-07, | |
| "logits/chosen": -2.236570358276367, | |
| "logits/rejected": -2.216663360595703, | |
| "logps/chosen": -137.65792846679688, | |
| "logps/rejected": -137.9815673828125, | |
| "loss": 125940.1375, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.025968383997678757, | |
| "rewards/margins": 0.009893245995044708, | |
| "rewards/rejected": -0.03586163371801376, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.3358963982193444, | |
| "grad_norm": 5564470.498348531, | |
| "learning_rate": 3.690958164642375e-07, | |
| "logits/chosen": -2.2721188068389893, | |
| "logits/rejected": -2.2634165287017822, | |
| "logps/chosen": -146.41432189941406, | |
| "logps/rejected": -148.6261749267578, | |
| "loss": 130783.825, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.028409641236066818, | |
| "rewards/margins": 0.013621616177260876, | |
| "rewards/rejected": -0.04203125834465027, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.33994334277620397, | |
| "grad_norm": 4256533.420167086, | |
| "learning_rate": 3.66846603688709e-07, | |
| "logits/chosen": -2.355905532836914, | |
| "logits/rejected": -2.3262717723846436, | |
| "logps/chosen": -135.9013671875, | |
| "logps/rejected": -144.34478759765625, | |
| "loss": 126088.525, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.02681833505630493, | |
| "rewards/margins": 0.009647052735090256, | |
| "rewards/rejected": -0.03646538779139519, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.3439902873330635, | |
| "grad_norm": 6179484.090448809, | |
| "learning_rate": 3.645973909131804e-07, | |
| "logits/chosen": -2.2327308654785156, | |
| "logits/rejected": -2.194852828979492, | |
| "logps/chosen": -131.39376831054688, | |
| "logps/rejected": -155.78671264648438, | |
| "loss": 125825.075, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.02463443949818611, | |
| "rewards/margins": 0.01199124101549387, | |
| "rewards/rejected": -0.03662567585706711, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.3480372318899231, | |
| "grad_norm": 5448182.802456733, | |
| "learning_rate": 3.6234817813765177e-07, | |
| "logits/chosen": -2.2509052753448486, | |
| "logits/rejected": -2.2193102836608887, | |
| "logps/chosen": -131.55270385742188, | |
| "logps/rejected": -144.63186645507812, | |
| "loss": 130804.4625, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.019251421093940735, | |
| "rewards/margins": 0.008828094229102135, | |
| "rewards/rejected": -0.02807951346039772, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.3520841764467827, | |
| "grad_norm": 4837603.177346373, | |
| "learning_rate": 3.6009896536212327e-07, | |
| "logits/chosen": -2.433258056640625, | |
| "logits/rejected": -2.404008150100708, | |
| "logps/chosen": -135.0418243408203, | |
| "logps/rejected": -134.2267303466797, | |
| "loss": 122885.925, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.017599385231733322, | |
| "rewards/margins": 0.0028229092713445425, | |
| "rewards/rejected": -0.02042229473590851, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.35613112100364225, | |
| "grad_norm": 5692190.523993364, | |
| "learning_rate": 3.5784975258659466e-07, | |
| "logits/chosen": -2.372664213180542, | |
| "logits/rejected": -2.4038546085357666, | |
| "logps/chosen": -145.62425231933594, | |
| "logps/rejected": -161.4815216064453, | |
| "loss": 125917.475, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.021967049688100815, | |
| "rewards/margins": 0.006153530441224575, | |
| "rewards/rejected": -0.028120581060647964, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.3601780655605018, | |
| "grad_norm": 5143465.092976311, | |
| "learning_rate": 3.5560053981106616e-07, | |
| "logits/chosen": -2.4298062324523926, | |
| "logits/rejected": -2.441378116607666, | |
| "logps/chosen": -114.91922760009766, | |
| "logps/rejected": -128.6441192626953, | |
| "loss": 125689.425, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.02063891664147377, | |
| "rewards/margins": 0.0056837559677660465, | |
| "rewards/rejected": -0.02632267400622368, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.3642250101173614, | |
| "grad_norm": 7348686.998660731, | |
| "learning_rate": 3.5335132703553755e-07, | |
| "logits/chosen": -2.3470609188079834, | |
| "logits/rejected": -2.338306427001953, | |
| "logps/chosen": -142.22169494628906, | |
| "logps/rejected": -155.0847625732422, | |
| "loss": 127013.675, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.021488003432750702, | |
| "rewards/margins": 0.00833697896450758, | |
| "rewards/rejected": -0.029824981465935707, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.36827195467422097, | |
| "grad_norm": 5455491.748505866, | |
| "learning_rate": 3.5110211426000895e-07, | |
| "logits/chosen": -2.328141689300537, | |
| "logits/rejected": -2.300947666168213, | |
| "logps/chosen": -144.3502655029297, | |
| "logps/rejected": -160.5428466796875, | |
| "loss": 132699.2, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.019702186807990074, | |
| "rewards/margins": 0.012580236420035362, | |
| "rewards/rejected": -0.032282426953315735, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.3723188992310805, | |
| "grad_norm": 5918454.321642784, | |
| "learning_rate": 3.4885290148448044e-07, | |
| "logits/chosen": -2.2618203163146973, | |
| "logits/rejected": -2.273591995239258, | |
| "logps/chosen": -140.4850616455078, | |
| "logps/rejected": -144.3483123779297, | |
| "loss": 126713.925, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.02276991680264473, | |
| "rewards/margins": 0.008334951475262642, | |
| "rewards/rejected": -0.03110486827790737, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.3763658437879401, | |
| "grad_norm": 7045240.388394526, | |
| "learning_rate": 3.4660368870895184e-07, | |
| "logits/chosen": -2.3371381759643555, | |
| "logits/rejected": -2.3148632049560547, | |
| "logps/chosen": -141.95742797851562, | |
| "logps/rejected": -160.81312561035156, | |
| "loss": 124856.1375, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.02805008552968502, | |
| "rewards/margins": 0.014153921976685524, | |
| "rewards/rejected": -0.042204007506370544, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.3804127883447997, | |
| "grad_norm": 5526632.5094240755, | |
| "learning_rate": 3.443544759334233e-07, | |
| "logits/chosen": -2.3289644718170166, | |
| "logits/rejected": -2.3078227043151855, | |
| "logps/chosen": -151.3744354248047, | |
| "logps/rejected": -153.30511474609375, | |
| "loss": 126556.475, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.02584829553961754, | |
| "rewards/margins": 0.005777581594884396, | |
| "rewards/rejected": -0.03162587806582451, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.38445973290165925, | |
| "grad_norm": 6075148.892704811, | |
| "learning_rate": 3.4210526315789473e-07, | |
| "logits/chosen": -2.2021899223327637, | |
| "logits/rejected": -2.199693441390991, | |
| "logps/chosen": -126.38993072509766, | |
| "logps/rejected": -135.55516052246094, | |
| "loss": 130061.4375, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.02466515824198723, | |
| "rewards/margins": 0.0069196284748613834, | |
| "rewards/rejected": -0.03158479183912277, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.3885066774585188, | |
| "grad_norm": 5994722.402682892, | |
| "learning_rate": 3.398560503823661e-07, | |
| "logits/chosen": -2.375749349594116, | |
| "logits/rejected": -2.350696086883545, | |
| "logps/chosen": -135.23800659179688, | |
| "logps/rejected": -143.55755615234375, | |
| "loss": 130424.2625, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.020815346390008926, | |
| "rewards/margins": 0.009448934346437454, | |
| "rewards/rejected": -0.03026428259909153, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.3925536220153784, | |
| "grad_norm": 6955347.020210707, | |
| "learning_rate": 3.376068376068376e-07, | |
| "logits/chosen": -2.41917085647583, | |
| "logits/rejected": -2.3552684783935547, | |
| "logps/chosen": -133.61973571777344, | |
| "logps/rejected": -150.07192993164062, | |
| "loss": 126116.675, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.023608971387147903, | |
| "rewards/margins": 0.013977563008666039, | |
| "rewards/rejected": -0.03758653253316879, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.39660056657223797, | |
| "grad_norm": 7026469.492711891, | |
| "learning_rate": 3.35357624831309e-07, | |
| "logits/chosen": -2.472712993621826, | |
| "logits/rejected": -2.4403810501098633, | |
| "logps/chosen": -144.36697387695312, | |
| "logps/rejected": -160.89016723632812, | |
| "loss": 125593.175, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.0269068144261837, | |
| "rewards/margins": 0.013755050487816334, | |
| "rewards/rejected": -0.04066186398267746, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.4006475111290975, | |
| "grad_norm": 5279874.300128242, | |
| "learning_rate": 3.3310841205578046e-07, | |
| "logits/chosen": -2.3706583976745605, | |
| "logits/rejected": -2.362694263458252, | |
| "logps/chosen": -130.1677703857422, | |
| "logps/rejected": -150.29214477539062, | |
| "loss": 122425.7125, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.02780333161354065, | |
| "rewards/margins": 0.010212745517492294, | |
| "rewards/rejected": -0.038016077131032944, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.4046944556859571, | |
| "grad_norm": 7346421.9033947745, | |
| "learning_rate": 3.308591992802519e-07, | |
| "logits/chosen": -2.3910489082336426, | |
| "logits/rejected": -2.360917091369629, | |
| "logps/chosen": -134.7192840576172, | |
| "logps/rejected": -145.80410766601562, | |
| "loss": 120740.6875, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.02175028808414936, | |
| "rewards/margins": 0.012434590607881546, | |
| "rewards/rejected": -0.03418487682938576, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.4087414002428167, | |
| "grad_norm": 6266220.786790677, | |
| "learning_rate": 3.286099865047233e-07, | |
| "logits/chosen": -2.258577585220337, | |
| "logits/rejected": -2.278409957885742, | |
| "logps/chosen": -134.9305877685547, | |
| "logps/rejected": -154.0025177001953, | |
| "loss": 127529.4875, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.020327283069491386, | |
| "rewards/margins": 0.010549711063504219, | |
| "rewards/rejected": -0.030876994132995605, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.41278834479967624, | |
| "grad_norm": 6569607.008702802, | |
| "learning_rate": 3.263607737291948e-07, | |
| "logits/chosen": -2.2700555324554443, | |
| "logits/rejected": -2.2413737773895264, | |
| "logps/chosen": -145.36404418945312, | |
| "logps/rejected": -159.6646270751953, | |
| "loss": 129882.5125, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.020192014053463936, | |
| "rewards/margins": 0.008731147274374962, | |
| "rewards/rejected": -0.0289231576025486, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.4168352893565358, | |
| "grad_norm": 6604946.805112461, | |
| "learning_rate": 3.241115609536662e-07, | |
| "logits/chosen": -2.2907137870788574, | |
| "logits/rejected": -2.2590463161468506, | |
| "logps/chosen": -148.82757568359375, | |
| "logps/rejected": -158.55458068847656, | |
| "loss": 123561.0125, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.02791331335902214, | |
| "rewards/margins": 0.01067260093986988, | |
| "rewards/rejected": -0.03858591616153717, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.4208822339133954, | |
| "grad_norm": 7819744.1215137215, | |
| "learning_rate": 3.2186234817813764e-07, | |
| "logits/chosen": -2.3501906394958496, | |
| "logits/rejected": -2.382286548614502, | |
| "logps/chosen": -145.73556518554688, | |
| "logps/rejected": -145.95733642578125, | |
| "loss": 125984.175, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.024780739098787308, | |
| "rewards/margins": 0.01095888763666153, | |
| "rewards/rejected": -0.03573962673544884, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.42492917847025496, | |
| "grad_norm": 8329276.874833419, | |
| "learning_rate": 3.196131354026091e-07, | |
| "logits/chosen": -2.3430895805358887, | |
| "logits/rejected": -2.2940633296966553, | |
| "logps/chosen": -156.47262573242188, | |
| "logps/rejected": -172.1248321533203, | |
| "loss": 127542.5875, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.026906628161668777, | |
| "rewards/margins": 0.02299944870173931, | |
| "rewards/rejected": -0.04990607872605324, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.4289761230271145, | |
| "grad_norm": 5411023.24022027, | |
| "learning_rate": 3.1736392262708053e-07, | |
| "logits/chosen": -2.350010633468628, | |
| "logits/rejected": -2.348132610321045, | |
| "logps/chosen": -134.76171875, | |
| "logps/rejected": -165.0662384033203, | |
| "loss": 124288.825, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.026082511991262436, | |
| "rewards/margins": 0.02408730424940586, | |
| "rewards/rejected": -0.05016981437802315, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.4330230675839741, | |
| "grad_norm": 6438956.528553201, | |
| "learning_rate": 3.151147098515519e-07, | |
| "logits/chosen": -2.403751850128174, | |
| "logits/rejected": -2.391162395477295, | |
| "logps/chosen": -133.4490966796875, | |
| "logps/rejected": -145.66363525390625, | |
| "loss": 122699.675, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.023318186402320862, | |
| "rewards/margins": 0.010946491733193398, | |
| "rewards/rejected": -0.03426467627286911, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.4370700121408337, | |
| "grad_norm": 5922234.372544115, | |
| "learning_rate": 3.1286549707602337e-07, | |
| "logits/chosen": -2.2476916313171387, | |
| "logits/rejected": -2.2207980155944824, | |
| "logps/chosen": -142.42433166503906, | |
| "logps/rejected": -152.08865356445312, | |
| "loss": 123837.95, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.028287163004279137, | |
| "rewards/margins": 0.015818050131201744, | |
| "rewards/rejected": -0.04410521313548088, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.44111695669769324, | |
| "grad_norm": 5883039.362660401, | |
| "learning_rate": 3.106162843004948e-07, | |
| "logits/chosen": -2.3883793354034424, | |
| "logits/rejected": -2.3448328971862793, | |
| "logps/chosen": -135.72998046875, | |
| "logps/rejected": -153.7415008544922, | |
| "loss": 124484.025, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.026858652010560036, | |
| "rewards/margins": 0.02032136358320713, | |
| "rewards/rejected": -0.04718000814318657, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.4451639012545528, | |
| "grad_norm": 7158679.357876272, | |
| "learning_rate": 3.0836707152496626e-07, | |
| "logits/chosen": -2.3638851642608643, | |
| "logits/rejected": -2.3225362300872803, | |
| "logps/chosen": -145.95896911621094, | |
| "logps/rejected": -169.71176147460938, | |
| "loss": 130674.3625, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.025300731882452965, | |
| "rewards/margins": 0.016643613576889038, | |
| "rewards/rejected": -0.04194434732198715, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.4492108458114124, | |
| "grad_norm": 7065276.414180166, | |
| "learning_rate": 3.061178587494377e-07, | |
| "logits/chosen": -2.3456435203552246, | |
| "logits/rejected": -2.3152847290039062, | |
| "logps/chosen": -126.73854064941406, | |
| "logps/rejected": -143.69662475585938, | |
| "loss": 127769.775, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.01884400099515915, | |
| "rewards/margins": 0.015868009999394417, | |
| "rewards/rejected": -0.03471200913190842, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.45325779036827196, | |
| "grad_norm": 8872861.336008936, | |
| "learning_rate": 3.038686459739091e-07, | |
| "logits/chosen": -2.3893070220947266, | |
| "logits/rejected": -2.379615068435669, | |
| "logps/chosen": -135.2264404296875, | |
| "logps/rejected": -147.5668487548828, | |
| "loss": 121978.65, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.025070184841752052, | |
| "rewards/margins": 0.01242685504257679, | |
| "rewards/rejected": -0.037497036159038544, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.4573047349251315, | |
| "grad_norm": 4362461.84620477, | |
| "learning_rate": 3.0161943319838055e-07, | |
| "logits/chosen": -2.3373289108276367, | |
| "logits/rejected": -2.3283610343933105, | |
| "logps/chosen": -113.62736511230469, | |
| "logps/rejected": -132.5222930908203, | |
| "loss": 122763.6, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.027866637334227562, | |
| "rewards/margins": 0.010607337579131126, | |
| "rewards/rejected": -0.03847397491335869, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.4613516794819911, | |
| "grad_norm": 6441902.5854437305, | |
| "learning_rate": 2.99370220422852e-07, | |
| "logits/chosen": -2.4195449352264404, | |
| "logits/rejected": -2.421095848083496, | |
| "logps/chosen": -138.25782775878906, | |
| "logps/rejected": -152.2244110107422, | |
| "loss": 128506.9875, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.01849151961505413, | |
| "rewards/margins": 0.007775165140628815, | |
| "rewards/rejected": -0.026266688480973244, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.4653986240388507, | |
| "grad_norm": 7047614.500405596, | |
| "learning_rate": 2.971210076473234e-07, | |
| "logits/chosen": -2.4957115650177, | |
| "logits/rejected": -2.4509148597717285, | |
| "logps/chosen": -137.063720703125, | |
| "logps/rejected": -144.99172973632812, | |
| "loss": 121503.125, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.018852662295103073, | |
| "rewards/margins": 0.013833269476890564, | |
| "rewards/rejected": -0.03268593177199364, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.46944556859571024, | |
| "grad_norm": 6473613.10465131, | |
| "learning_rate": 2.948717948717949e-07, | |
| "logits/chosen": -2.508885145187378, | |
| "logits/rejected": -2.4511702060699463, | |
| "logps/chosen": -144.10775756835938, | |
| "logps/rejected": -154.82540893554688, | |
| "loss": 129245.7125, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.025161290541291237, | |
| "rewards/margins": 0.008113402873277664, | |
| "rewards/rejected": -0.03327469527721405, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.4734925131525698, | |
| "grad_norm": 6225189.741747939, | |
| "learning_rate": 2.926225820962663e-07, | |
| "logits/chosen": -2.5162465572357178, | |
| "logits/rejected": -2.526261568069458, | |
| "logps/chosen": -134.27059936523438, | |
| "logps/rejected": -153.3767852783203, | |
| "loss": 129228.5, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.020399171859025955, | |
| "rewards/margins": 0.01304579060524702, | |
| "rewards/rejected": -0.03344495967030525, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.4775394577094294, | |
| "grad_norm": 6686211.632899741, | |
| "learning_rate": 2.903733693207377e-07, | |
| "logits/chosen": -2.500845432281494, | |
| "logits/rejected": -2.4776079654693604, | |
| "logps/chosen": -139.28172302246094, | |
| "logps/rejected": -162.630126953125, | |
| "loss": 127296.0, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.021778758615255356, | |
| "rewards/margins": 0.016411086544394493, | |
| "rewards/rejected": -0.038189847022295, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.48158640226628896, | |
| "grad_norm": 7327493.262833852, | |
| "learning_rate": 2.8812415654520917e-07, | |
| "logits/chosen": -2.5000369548797607, | |
| "logits/rejected": -2.4850993156433105, | |
| "logps/chosen": -133.94406127929688, | |
| "logps/rejected": -149.57473754882812, | |
| "loss": 130669.6, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.02554786205291748, | |
| "rewards/margins": 0.0159430094063282, | |
| "rewards/rejected": -0.04149087145924568, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.4856333468231485, | |
| "grad_norm": 8827001.924529044, | |
| "learning_rate": 2.8587494376968056e-07, | |
| "logits/chosen": -2.411595582962036, | |
| "logits/rejected": -2.4169204235076904, | |
| "logps/chosen": -130.80325317382812, | |
| "logps/rejected": -142.50013732910156, | |
| "loss": 121415.625, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.019139764830470085, | |
| "rewards/margins": 0.018100781366229057, | |
| "rewards/rejected": -0.03724054619669914, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.48968029138000807, | |
| "grad_norm": 5557571.235890019, | |
| "learning_rate": 2.8362573099415206e-07, | |
| "logits/chosen": -2.538846015930176, | |
| "logits/rejected": -2.502953052520752, | |
| "logps/chosen": -134.97543334960938, | |
| "logps/rejected": -142.17556762695312, | |
| "loss": 118867.85, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.013044399209320545, | |
| "rewards/margins": 0.011013238690793514, | |
| "rewards/rejected": -0.02405763790011406, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.4937272359368677, | |
| "grad_norm": 6914411.411931411, | |
| "learning_rate": 2.8137651821862346e-07, | |
| "logits/chosen": -2.389519691467285, | |
| "logits/rejected": -2.3551812171936035, | |
| "logps/chosen": -138.05276489257812, | |
| "logps/rejected": -160.35946655273438, | |
| "loss": 127666.15, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.025238817557692528, | |
| "rewards/margins": 0.02308265119791031, | |
| "rewards/rejected": -0.048321474343538284, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.49777418049372724, | |
| "grad_norm": 5609340.931669485, | |
| "learning_rate": 2.7912730544309496e-07, | |
| "logits/chosen": -2.4456872940063477, | |
| "logits/rejected": -2.410588026046753, | |
| "logps/chosen": -152.31521606445312, | |
| "logps/rejected": -166.41482543945312, | |
| "loss": 126962.65, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.029917621985077858, | |
| "rewards/margins": 0.013386559672653675, | |
| "rewards/rejected": -0.04330417513847351, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.5018211250505868, | |
| "grad_norm": 5540227.088123434, | |
| "learning_rate": 2.7687809266756635e-07, | |
| "logits/chosen": -2.3677735328674316, | |
| "logits/rejected": -2.354952335357666, | |
| "logps/chosen": -126.54608154296875, | |
| "logps/rejected": -145.7926483154297, | |
| "loss": 127253.85, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.018986444920301437, | |
| "rewards/margins": 0.016626928001642227, | |
| "rewards/rejected": -0.035613369196653366, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.5058680696074463, | |
| "grad_norm": 8562833.744693786, | |
| "learning_rate": 2.7462887989203774e-07, | |
| "logits/chosen": -2.344916820526123, | |
| "logits/rejected": -2.312051296234131, | |
| "logps/chosen": -138.89639282226562, | |
| "logps/rejected": -144.3848876953125, | |
| "loss": 134452.375, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.02468470111489296, | |
| "rewards/margins": 0.008161008358001709, | |
| "rewards/rejected": -0.03284571319818497, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.509915014164306, | |
| "grad_norm": 5502511.320429619, | |
| "learning_rate": 2.7237966711650924e-07, | |
| "logits/chosen": -2.283324718475342, | |
| "logits/rejected": -2.2585034370422363, | |
| "logps/chosen": -142.3020782470703, | |
| "logps/rejected": -157.03909301757812, | |
| "loss": 126080.5375, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.02461249753832817, | |
| "rewards/margins": 0.02032613940536976, | |
| "rewards/rejected": -0.04493863508105278, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.5139619587211656, | |
| "grad_norm": 8682050.76660753, | |
| "learning_rate": 2.7013045434098063e-07, | |
| "logits/chosen": -2.275059223175049, | |
| "logits/rejected": -2.2415084838867188, | |
| "logps/chosen": -140.72640991210938, | |
| "logps/rejected": -159.70445251464844, | |
| "loss": 128182.575, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.033078595995903015, | |
| "rewards/margins": 0.01954609341919422, | |
| "rewards/rejected": -0.052624695003032684, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.5180089032780251, | |
| "grad_norm": 9710402.98065158, | |
| "learning_rate": 2.678812415654521e-07, | |
| "logits/chosen": -2.2827441692352295, | |
| "logits/rejected": -2.2365641593933105, | |
| "logps/chosen": -159.8201446533203, | |
| "logps/rejected": -167.04348754882812, | |
| "loss": 126161.6, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.0369146391749382, | |
| "rewards/margins": 0.009988631121814251, | |
| "rewards/rejected": -0.04690327122807503, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.5220558478348847, | |
| "grad_norm": 5687965.997013683, | |
| "learning_rate": 2.656320287899235e-07, | |
| "logits/chosen": -2.440713405609131, | |
| "logits/rejected": -2.420994281768799, | |
| "logps/chosen": -141.1729278564453, | |
| "logps/rejected": -146.4890899658203, | |
| "loss": 120775.6875, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.02869614027440548, | |
| "rewards/margins": 0.01065666601061821, | |
| "rewards/rejected": -0.03935280814766884, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.5261027923917442, | |
| "grad_norm": 6948542.641971354, | |
| "learning_rate": 2.633828160143949e-07, | |
| "logits/chosen": -2.462017774581909, | |
| "logits/rejected": -2.479309558868408, | |
| "logps/chosen": -148.1350860595703, | |
| "logps/rejected": -158.99916076660156, | |
| "loss": 127668.2, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.020126869902014732, | |
| "rewards/margins": 0.011567593552172184, | |
| "rewards/rejected": -0.03169446438550949, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.5301497369486038, | |
| "grad_norm": 8950839.210890554, | |
| "learning_rate": 2.611336032388664e-07, | |
| "logits/chosen": -2.379216432571411, | |
| "logits/rejected": -2.349857807159424, | |
| "logps/chosen": -158.7649688720703, | |
| "logps/rejected": -158.4715118408203, | |
| "loss": 133855.0375, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.030759122222661972, | |
| "rewards/margins": 0.004436601884663105, | |
| "rewards/rejected": -0.0351957306265831, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.5341966815054634, | |
| "grad_norm": 5080965.438845941, | |
| "learning_rate": 2.588843904633378e-07, | |
| "logits/chosen": -2.4408226013183594, | |
| "logits/rejected": -2.4230995178222656, | |
| "logps/chosen": -122.5213394165039, | |
| "logps/rejected": -136.46041870117188, | |
| "loss": 125633.5875, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.022939473390579224, | |
| "rewards/margins": 0.01324677187949419, | |
| "rewards/rejected": -0.03618624433875084, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.5382436260623229, | |
| "grad_norm": 5182504.597846507, | |
| "learning_rate": 2.5663517768780926e-07, | |
| "logits/chosen": -2.505174160003662, | |
| "logits/rejected": -2.483182191848755, | |
| "logps/chosen": -142.41513061523438, | |
| "logps/rejected": -153.15907287597656, | |
| "loss": 123496.5125, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.027721602469682693, | |
| "rewards/margins": 0.015740955248475075, | |
| "rewards/rejected": -0.04346255585551262, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.5422905706191825, | |
| "grad_norm": 7128845.279886402, | |
| "learning_rate": 2.543859649122807e-07, | |
| "logits/chosen": -2.478231430053711, | |
| "logits/rejected": -2.448133945465088, | |
| "logps/chosen": -135.66380310058594, | |
| "logps/rejected": -153.63766479492188, | |
| "loss": 126797.3625, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.02139298990368843, | |
| "rewards/margins": 0.010669348761439323, | |
| "rewards/rejected": -0.0320623405277729, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.5463375151760421, | |
| "grad_norm": 6676875.1940184785, | |
| "learning_rate": 2.521367521367521e-07, | |
| "logits/chosen": -2.4742424488067627, | |
| "logits/rejected": -2.4604861736297607, | |
| "logps/chosen": -117.7722396850586, | |
| "logps/rejected": -130.4296875, | |
| "loss": 125562.3, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.01471949927508831, | |
| "rewards/margins": 0.010869570076465607, | |
| "rewards/rejected": -0.025589067488908768, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.5503844597329016, | |
| "grad_norm": 5238344.574016525, | |
| "learning_rate": 2.4988753936122354e-07, | |
| "logits/chosen": -2.404810905456543, | |
| "logits/rejected": -2.386918544769287, | |
| "logps/chosen": -129.4068603515625, | |
| "logps/rejected": -142.20303344726562, | |
| "loss": 121443.45, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.014744667336344719, | |
| "rewards/margins": 0.01344493217766285, | |
| "rewards/rejected": -0.02818959951400757, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.5544314042897612, | |
| "grad_norm": 6640943.9300566595, | |
| "learning_rate": 2.47638326585695e-07, | |
| "logits/chosen": -2.3393630981445312, | |
| "logits/rejected": -2.3323869705200195, | |
| "logps/chosen": -132.86630249023438, | |
| "logps/rejected": -144.26113891601562, | |
| "loss": 127953.15, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.026765987277030945, | |
| "rewards/margins": 0.01323648076504469, | |
| "rewards/rejected": -0.04000247269868851, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.5584783488466208, | |
| "grad_norm": 11500911.034452418, | |
| "learning_rate": 2.4538911381016643e-07, | |
| "logits/chosen": -2.2898106575012207, | |
| "logits/rejected": -2.346161127090454, | |
| "logps/chosen": -147.8282928466797, | |
| "logps/rejected": -163.23367309570312, | |
| "loss": 116464.0875, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.018574833869934082, | |
| "rewards/margins": 0.024278491735458374, | |
| "rewards/rejected": -0.042853325605392456, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.5625252934034803, | |
| "grad_norm": 5617383.1061038, | |
| "learning_rate": 2.431399010346379e-07, | |
| "logits/chosen": -2.4383928775787354, | |
| "logits/rejected": -2.447169780731201, | |
| "logps/chosen": -125.36312103271484, | |
| "logps/rejected": -135.82142639160156, | |
| "loss": 127916.95, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.018465066328644753, | |
| "rewards/margins": 0.007541149854660034, | |
| "rewards/rejected": -0.026006218045949936, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.56657223796034, | |
| "grad_norm": 7455055.133065385, | |
| "learning_rate": 2.408906882591093e-07, | |
| "logits/chosen": -2.351076126098633, | |
| "logits/rejected": -2.386265993118286, | |
| "logps/chosen": -138.0536346435547, | |
| "logps/rejected": -153.21102905273438, | |
| "loss": 126554.1, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.02320500835776329, | |
| "rewards/margins": 0.008944050408899784, | |
| "rewards/rejected": -0.0321490578353405, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.5706191825171996, | |
| "grad_norm": 5233092.811472115, | |
| "learning_rate": 2.386414754835807e-07, | |
| "logits/chosen": -2.378037929534912, | |
| "logits/rejected": -2.3643617630004883, | |
| "logps/chosen": -160.79556274414062, | |
| "logps/rejected": -167.969970703125, | |
| "loss": 121087.4625, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.021083252504467964, | |
| "rewards/margins": 0.008589145727455616, | |
| "rewards/rejected": -0.029672399163246155, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.5746661270740591, | |
| "grad_norm": 8648367.034730982, | |
| "learning_rate": 2.363922627080522e-07, | |
| "logits/chosen": -2.4605114459991455, | |
| "logits/rejected": -2.432900905609131, | |
| "logps/chosen": -145.24966430664062, | |
| "logps/rejected": -169.27865600585938, | |
| "loss": 127293.625, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.028744569048285484, | |
| "rewards/margins": 0.01984976790845394, | |
| "rewards/rejected": -0.04859434440732002, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.5787130716309187, | |
| "grad_norm": 6917627.189571037, | |
| "learning_rate": 2.3414304993252359e-07, | |
| "logits/chosen": -2.415008783340454, | |
| "logits/rejected": -2.390291213989258, | |
| "logps/chosen": -118.60847473144531, | |
| "logps/rejected": -137.45223999023438, | |
| "loss": 126428.6625, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.021594971418380737, | |
| "rewards/margins": 0.010223200544714928, | |
| "rewards/rejected": -0.031818170100450516, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.5827600161877782, | |
| "grad_norm": 7112073.222735109, | |
| "learning_rate": 2.3189383715699503e-07, | |
| "logits/chosen": -2.361323595046997, | |
| "logits/rejected": -2.359731674194336, | |
| "logps/chosen": -136.58473205566406, | |
| "logps/rejected": -162.69363403320312, | |
| "loss": 126602.9125, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.02461722306907177, | |
| "rewards/margins": 0.010918731801211834, | |
| "rewards/rejected": -0.03553595766425133, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.5868069607446378, | |
| "grad_norm": 5694863.468784067, | |
| "learning_rate": 2.2964462438146648e-07, | |
| "logits/chosen": -2.4715747833251953, | |
| "logits/rejected": -2.4460928440093994, | |
| "logps/chosen": -139.74227905273438, | |
| "logps/rejected": -143.89366149902344, | |
| "loss": 124850.7625, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.019559044390916824, | |
| "rewards/margins": 0.007027704268693924, | |
| "rewards/rejected": -0.0265867467969656, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.5908539053014974, | |
| "grad_norm": 6596951.588588771, | |
| "learning_rate": 2.2739541160593792e-07, | |
| "logits/chosen": -2.405214786529541, | |
| "logits/rejected": -2.376192569732666, | |
| "logps/chosen": -132.67037963867188, | |
| "logps/rejected": -152.36544799804688, | |
| "loss": 129629.9375, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.019801389425992966, | |
| "rewards/margins": 0.02020254358649254, | |
| "rewards/rejected": -0.040003933012485504, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.5949008498583569, | |
| "grad_norm": 8604502.436566744, | |
| "learning_rate": 2.2514619883040934e-07, | |
| "logits/chosen": -2.4290502071380615, | |
| "logits/rejected": -2.4105029106140137, | |
| "logps/chosen": -138.2572784423828, | |
| "logps/rejected": -157.66226196289062, | |
| "loss": 126705.45, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.023596590384840965, | |
| "rewards/margins": 0.01850738190114498, | |
| "rewards/rejected": -0.042103976011276245, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.5989477944152165, | |
| "grad_norm": 6476200.112160567, | |
| "learning_rate": 2.2289698605488076e-07, | |
| "logits/chosen": -2.405041217803955, | |
| "logits/rejected": -2.343621253967285, | |
| "logps/chosen": -135.63980102539062, | |
| "logps/rejected": -157.7008056640625, | |
| "loss": 124231.4625, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.025133201852440834, | |
| "rewards/margins": 0.02330349013209343, | |
| "rewards/rejected": -0.048436690121889114, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.6029947389720761, | |
| "grad_norm": 6131917.113665815, | |
| "learning_rate": 2.206477732793522e-07, | |
| "logits/chosen": -2.413145065307617, | |
| "logits/rejected": -2.4113070964813232, | |
| "logps/chosen": -131.548583984375, | |
| "logps/rejected": -143.9263153076172, | |
| "loss": 123647.9125, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.024967512115836143, | |
| "rewards/margins": 0.014087630435824394, | |
| "rewards/rejected": -0.03905514255166054, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.6070416835289356, | |
| "grad_norm": 6175564.2334703235, | |
| "learning_rate": 2.1839856050382366e-07, | |
| "logits/chosen": -2.4009850025177, | |
| "logits/rejected": -2.3908042907714844, | |
| "logps/chosen": -138.0840606689453, | |
| "logps/rejected": -150.7329864501953, | |
| "loss": 128591.4875, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.02667851746082306, | |
| "rewards/margins": 0.0038915693294256926, | |
| "rewards/rejected": -0.03057008981704712, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.6110886280857952, | |
| "grad_norm": 7212353.914117165, | |
| "learning_rate": 2.161493477282951e-07, | |
| "logits/chosen": -2.405449628829956, | |
| "logits/rejected": -2.3872292041778564, | |
| "logps/chosen": -123.17295837402344, | |
| "logps/rejected": -143.22288513183594, | |
| "loss": 130148.7, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.01992269791662693, | |
| "rewards/margins": 0.015165319666266441, | |
| "rewards/rejected": -0.03508801758289337, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.6151355726426548, | |
| "grad_norm": 9046114.113444956, | |
| "learning_rate": 2.1390013495276652e-07, | |
| "logits/chosen": -2.405348777770996, | |
| "logits/rejected": -2.4298527240753174, | |
| "logps/chosen": -147.79513549804688, | |
| "logps/rejected": -168.75872802734375, | |
| "loss": 127527.1, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.026391273364424706, | |
| "rewards/margins": 0.009268445894122124, | |
| "rewards/rejected": -0.03565971553325653, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.6191825171995143, | |
| "grad_norm": 6130066.161443972, | |
| "learning_rate": 2.1165092217723797e-07, | |
| "logits/chosen": -2.3494181632995605, | |
| "logits/rejected": -2.3200573921203613, | |
| "logps/chosen": -131.47329711914062, | |
| "logps/rejected": -151.9276885986328, | |
| "loss": 121959.575, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.02297963574528694, | |
| "rewards/margins": 0.016111956909298897, | |
| "rewards/rejected": -0.03909159451723099, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.623229461756374, | |
| "grad_norm": 6650864.453901279, | |
| "learning_rate": 2.0940170940170939e-07, | |
| "logits/chosen": -2.3809990882873535, | |
| "logits/rejected": -2.3723671436309814, | |
| "logps/chosen": -156.38284301757812, | |
| "logps/rejected": -171.9077606201172, | |
| "loss": 122674.425, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.02390839345753193, | |
| "rewards/margins": 0.019179565832018852, | |
| "rewards/rejected": -0.04308795928955078, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.6272764063132336, | |
| "grad_norm": 5183894.402422156, | |
| "learning_rate": 2.0715249662618083e-07, | |
| "logits/chosen": -2.4737048149108887, | |
| "logits/rejected": -2.446381092071533, | |
| "logps/chosen": -145.76119995117188, | |
| "logps/rejected": -165.5288848876953, | |
| "loss": 125087.2875, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.02032746747136116, | |
| "rewards/margins": 0.009251989424228668, | |
| "rewards/rejected": -0.029579460620880127, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.6313233508700931, | |
| "grad_norm": 6969624.578927646, | |
| "learning_rate": 2.0490328385065225e-07, | |
| "logits/chosen": -2.405435800552368, | |
| "logits/rejected": -2.4140048027038574, | |
| "logps/chosen": -119.48077392578125, | |
| "logps/rejected": -130.77532958984375, | |
| "loss": 125878.4625, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.018460571765899658, | |
| "rewards/margins": 0.010110612958669662, | |
| "rewards/rejected": -0.02857118286192417, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.6353702954269527, | |
| "grad_norm": 5800628.746003852, | |
| "learning_rate": 2.026540710751237e-07, | |
| "logits/chosen": -2.366516351699829, | |
| "logits/rejected": -2.3698983192443848, | |
| "logps/chosen": -147.12881469726562, | |
| "logps/rejected": -146.3048553466797, | |
| "loss": 129275.3375, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.01768730953335762, | |
| "rewards/margins": 0.011509931646287441, | |
| "rewards/rejected": -0.029197242110967636, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.6394172399838122, | |
| "grad_norm": 14918875.434130527, | |
| "learning_rate": 2.0040485829959514e-07, | |
| "logits/chosen": -2.4734253883361816, | |
| "logits/rejected": -2.4595344066619873, | |
| "logps/chosen": -125.6633529663086, | |
| "logps/rejected": -142.96157836914062, | |
| "loss": 123910.425, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.018165288493037224, | |
| "rewards/margins": 0.015782013535499573, | |
| "rewards/rejected": -0.033947303891181946, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.6434641845406718, | |
| "grad_norm": 5025901.152056148, | |
| "learning_rate": 1.981556455240666e-07, | |
| "logits/chosen": -2.4370574951171875, | |
| "logits/rejected": -2.4227161407470703, | |
| "logps/chosen": -141.5856475830078, | |
| "logps/rejected": -164.05941772460938, | |
| "loss": 128733.9125, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.021845245733857155, | |
| "rewards/margins": 0.011800579726696014, | |
| "rewards/rejected": -0.03364582732319832, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.6475111290975314, | |
| "grad_norm": 5965330.851620259, | |
| "learning_rate": 1.9590643274853798e-07, | |
| "logits/chosen": -2.398038387298584, | |
| "logits/rejected": -2.37715482711792, | |
| "logps/chosen": -118.133544921875, | |
| "logps/rejected": -130.4759063720703, | |
| "loss": 123004.3375, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.01905783638358116, | |
| "rewards/margins": 0.011883154511451721, | |
| "rewards/rejected": -0.030940990895032883, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.6515580736543909, | |
| "grad_norm": 6299921.629356138, | |
| "learning_rate": 1.9365721997300943e-07, | |
| "logits/chosen": -2.3423843383789062, | |
| "logits/rejected": -2.2989087104797363, | |
| "logps/chosen": -115.65093994140625, | |
| "logps/rejected": -138.21340942382812, | |
| "loss": 127277.8375, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.02753433585166931, | |
| "rewards/margins": 0.014595555141568184, | |
| "rewards/rejected": -0.042129892855882645, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.6556050182112505, | |
| "grad_norm": 7576070.098325265, | |
| "learning_rate": 1.9140800719748088e-07, | |
| "logits/chosen": -2.320422649383545, | |
| "logits/rejected": -2.290821075439453, | |
| "logps/chosen": -117.01118469238281, | |
| "logps/rejected": -125.79508972167969, | |
| "loss": 124367.8875, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.01931951195001602, | |
| "rewards/margins": 0.006707245949655771, | |
| "rewards/rejected": -0.026026759296655655, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.6596519627681101, | |
| "grad_norm": 6162429.013600917, | |
| "learning_rate": 1.8915879442195232e-07, | |
| "logits/chosen": -2.334224224090576, | |
| "logits/rejected": -2.3541088104248047, | |
| "logps/chosen": -136.18832397460938, | |
| "logps/rejected": -151.46710205078125, | |
| "loss": 122585.7625, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.019429903477430344, | |
| "rewards/margins": 0.011553862132132053, | |
| "rewards/rejected": -0.03098376654088497, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.6636989073249696, | |
| "grad_norm": 4988734.781634246, | |
| "learning_rate": 1.8690958164642374e-07, | |
| "logits/chosen": -2.4520297050476074, | |
| "logits/rejected": -2.42329478263855, | |
| "logps/chosen": -144.2743682861328, | |
| "logps/rejected": -155.20095825195312, | |
| "loss": 124995.575, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.016223471611738205, | |
| "rewards/margins": 0.015287751331925392, | |
| "rewards/rejected": -0.031511224806308746, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.6677458518818292, | |
| "grad_norm": 6629567.008457434, | |
| "learning_rate": 1.8466036887089516e-07, | |
| "logits/chosen": -2.345116376876831, | |
| "logits/rejected": -2.348301887512207, | |
| "logps/chosen": -129.6711883544922, | |
| "logps/rejected": -151.17837524414062, | |
| "loss": 122800.6625, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.020190779119729996, | |
| "rewards/margins": 0.0185395535081625, | |
| "rewards/rejected": -0.03873033449053764, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.6717927964386888, | |
| "grad_norm": 5601907.507778279, | |
| "learning_rate": 1.824111560953666e-07, | |
| "logits/chosen": -2.2657582759857178, | |
| "logits/rejected": -2.260693311691284, | |
| "logps/chosen": -128.01144409179688, | |
| "logps/rejected": -155.50888061523438, | |
| "loss": 124625.45, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.027610983699560165, | |
| "rewards/margins": 0.020689968019723892, | |
| "rewards/rejected": -0.04830095171928406, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.6758397409955483, | |
| "grad_norm": 5845172.102872888, | |
| "learning_rate": 1.8016194331983805e-07, | |
| "logits/chosen": -2.289998769760132, | |
| "logits/rejected": -2.2984931468963623, | |
| "logps/chosen": -120.5185775756836, | |
| "logps/rejected": -140.23655700683594, | |
| "loss": 125251.7625, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.029199788346886635, | |
| "rewards/margins": 0.014481378719210625, | |
| "rewards/rejected": -0.04368116706609726, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.6798866855524079, | |
| "grad_norm": 6195370.996741281, | |
| "learning_rate": 1.779127305443095e-07, | |
| "logits/chosen": -2.3538708686828613, | |
| "logits/rejected": -2.334139347076416, | |
| "logps/chosen": -136.63980102539062, | |
| "logps/rejected": -144.39056396484375, | |
| "loss": 129559.5375, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.030268553644418716, | |
| "rewards/margins": 0.0073168775998055935, | |
| "rewards/rejected": -0.03758542984724045, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.6839336301092676, | |
| "grad_norm": 8790702.611604873, | |
| "learning_rate": 1.7566351776878092e-07, | |
| "logits/chosen": -2.3637337684631348, | |
| "logits/rejected": -2.3632633686065674, | |
| "logps/chosen": -129.40554809570312, | |
| "logps/rejected": -147.02755737304688, | |
| "loss": 128803.0875, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.023326028138399124, | |
| "rewards/margins": 0.011800029315054417, | |
| "rewards/rejected": -0.03512606397271156, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.687980574666127, | |
| "grad_norm": 8047500.436527203, | |
| "learning_rate": 1.7341430499325237e-07, | |
| "logits/chosen": -2.2449162006378174, | |
| "logits/rejected": -2.254812717437744, | |
| "logps/chosen": -128.66867065429688, | |
| "logps/rejected": -134.86846923828125, | |
| "loss": 130052.75, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.021912669762969017, | |
| "rewards/margins": 0.007869280874729156, | |
| "rewards/rejected": -0.029781952500343323, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.6920275192229867, | |
| "grad_norm": 7961693.976507484, | |
| "learning_rate": 1.7116509221772378e-07, | |
| "logits/chosen": -2.308650255203247, | |
| "logits/rejected": -2.325552463531494, | |
| "logps/chosen": -118.10685729980469, | |
| "logps/rejected": -132.20530700683594, | |
| "loss": 125613.825, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.022883836179971695, | |
| "rewards/margins": 0.00968220829963684, | |
| "rewards/rejected": -0.032566044479608536, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.6960744637798462, | |
| "grad_norm": 8983241.448290937, | |
| "learning_rate": 1.6891587944219523e-07, | |
| "logits/chosen": -2.3228538036346436, | |
| "logits/rejected": -2.295989990234375, | |
| "logps/chosen": -136.067138671875, | |
| "logps/rejected": -149.92922973632812, | |
| "loss": 124795.0875, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.01805921457707882, | |
| "rewards/margins": 0.018675491213798523, | |
| "rewards/rejected": -0.03673470392823219, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.7001214083367058, | |
| "grad_norm": 6598233.046729883, | |
| "learning_rate": 1.6666666666666665e-07, | |
| "logits/chosen": -2.321946144104004, | |
| "logits/rejected": -2.2634482383728027, | |
| "logps/chosen": -156.81881713867188, | |
| "logps/rejected": -175.48133850097656, | |
| "loss": 124281.6625, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.023901356384158134, | |
| "rewards/margins": 0.020154178142547607, | |
| "rewards/rejected": -0.04405553638935089, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.7041683528935654, | |
| "grad_norm": 7393573.408673971, | |
| "learning_rate": 1.644174538911381e-07, | |
| "logits/chosen": -2.157721996307373, | |
| "logits/rejected": -2.1384575366973877, | |
| "logps/chosen": -156.61431884765625, | |
| "logps/rejected": -169.68267822265625, | |
| "loss": 123967.125, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.033402346074581146, | |
| "rewards/margins": 0.017039528116583824, | |
| "rewards/rejected": -0.05044187977910042, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.7082152974504249, | |
| "grad_norm": 8650281.232154809, | |
| "learning_rate": 1.6216824111560954e-07, | |
| "logits/chosen": -2.3099396228790283, | |
| "logits/rejected": -2.314627170562744, | |
| "logps/chosen": -140.70175170898438, | |
| "logps/rejected": -165.3170928955078, | |
| "loss": 125535.2875, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.02742253616452217, | |
| "rewards/margins": 0.015254299156367779, | |
| "rewards/rejected": -0.04267684370279312, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.7122622420072845, | |
| "grad_norm": 6208948.317347317, | |
| "learning_rate": 1.5991902834008096e-07, | |
| "logits/chosen": -2.3783583641052246, | |
| "logits/rejected": -2.362631320953369, | |
| "logps/chosen": -148.7383270263672, | |
| "logps/rejected": -162.62327575683594, | |
| "loss": 121080.075, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.014549913816154003, | |
| "rewards/margins": 0.013842826709151268, | |
| "rewards/rejected": -0.028392743319272995, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.7163091865641441, | |
| "grad_norm": 7739071.113911002, | |
| "learning_rate": 1.5766981556455238e-07, | |
| "logits/chosen": -2.299868583679199, | |
| "logits/rejected": -2.2598750591278076, | |
| "logps/chosen": -162.82052612304688, | |
| "logps/rejected": -184.3058319091797, | |
| "loss": 122385.7125, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.030940508469939232, | |
| "rewards/margins": 0.023769445717334747, | |
| "rewards/rejected": -0.05470995977520943, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.7203561311210036, | |
| "grad_norm": 6707665.892655141, | |
| "learning_rate": 1.5542060278902383e-07, | |
| "logits/chosen": -2.3239502906799316, | |
| "logits/rejected": -2.3085806369781494, | |
| "logps/chosen": -139.06484985351562, | |
| "logps/rejected": -157.50460815429688, | |
| "loss": 115194.475, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.03321670740842819, | |
| "rewards/margins": 0.021816464141011238, | |
| "rewards/rejected": -0.05503316968679428, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.7244030756778632, | |
| "grad_norm": 7475588.891135754, | |
| "learning_rate": 1.5317139001349527e-07, | |
| "logits/chosen": -2.380169630050659, | |
| "logits/rejected": -2.3587822914123535, | |
| "logps/chosen": -134.81069946289062, | |
| "logps/rejected": -149.78839111328125, | |
| "loss": 135028.0125, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.03387707471847534, | |
| "rewards/margins": 0.011253075674176216, | |
| "rewards/rejected": -0.04513014853000641, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.7284500202347228, | |
| "grad_norm": 6224311.633435066, | |
| "learning_rate": 1.5092217723796672e-07, | |
| "logits/chosen": -2.4899191856384277, | |
| "logits/rejected": -2.461540460586548, | |
| "logps/chosen": -139.72994995117188, | |
| "logps/rejected": -154.91757202148438, | |
| "loss": 127101.55, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.022551989182829857, | |
| "rewards/margins": 0.019368382170796394, | |
| "rewards/rejected": -0.04192037135362625, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.7324969647915823, | |
| "grad_norm": 6407363.569135414, | |
| "learning_rate": 1.4867296446243814e-07, | |
| "logits/chosen": -2.457529067993164, | |
| "logits/rejected": -2.4312427043914795, | |
| "logps/chosen": -171.8442840576172, | |
| "logps/rejected": -170.40664672851562, | |
| "loss": 126581.2375, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.02567433752119541, | |
| "rewards/margins": 0.008936228230595589, | |
| "rewards/rejected": -0.0346105620265007, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.7365439093484419, | |
| "grad_norm": 5335773.687286384, | |
| "learning_rate": 1.4642375168690956e-07, | |
| "logits/chosen": -2.442826986312866, | |
| "logits/rejected": -2.424445867538452, | |
| "logps/chosen": -130.82366943359375, | |
| "logps/rejected": -150.00717163085938, | |
| "loss": 121689.35, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.020857717841863632, | |
| "rewards/margins": 0.011433606036007404, | |
| "rewards/rejected": -0.03229131922125816, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.7405908539053015, | |
| "grad_norm": 5919606.114162943, | |
| "learning_rate": 1.44174538911381e-07, | |
| "logits/chosen": -2.4367711544036865, | |
| "logits/rejected": -2.4152512550354004, | |
| "logps/chosen": -116.6092758178711, | |
| "logps/rejected": -137.42446899414062, | |
| "loss": 124829.175, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.018279392272233963, | |
| "rewards/margins": 0.017404617741703987, | |
| "rewards/rejected": -0.0356840081512928, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.744637798462161, | |
| "grad_norm": 4526671.180016859, | |
| "learning_rate": 1.4192532613585245e-07, | |
| "logits/chosen": -2.3979544639587402, | |
| "logits/rejected": -2.3597800731658936, | |
| "logps/chosen": -135.9434814453125, | |
| "logps/rejected": -138.03778076171875, | |
| "loss": 129111.95, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.024272512644529343, | |
| "rewards/margins": 0.012156413868069649, | |
| "rewards/rejected": -0.03642892464995384, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.7486847430190207, | |
| "grad_norm": 7139221.010538934, | |
| "learning_rate": 1.396761133603239e-07, | |
| "logits/chosen": -2.4428467750549316, | |
| "logits/rejected": -2.428190231323242, | |
| "logps/chosen": -123.2089614868164, | |
| "logps/rejected": -138.09390258789062, | |
| "loss": 128958.6625, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.018383827060461044, | |
| "rewards/margins": 0.010701683349907398, | |
| "rewards/rejected": -0.029085511341691017, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.7527316875758802, | |
| "grad_norm": 7046675.547216455, | |
| "learning_rate": 1.3742690058479532e-07, | |
| "logits/chosen": -2.4591715335845947, | |
| "logits/rejected": -2.426462411880493, | |
| "logps/chosen": -133.52520751953125, | |
| "logps/rejected": -138.1920623779297, | |
| "loss": 130433.475, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.014568162150681019, | |
| "rewards/margins": 0.013440297916531563, | |
| "rewards/rejected": -0.028008460998535156, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.7567786321327398, | |
| "grad_norm": 6183615.802176011, | |
| "learning_rate": 1.3517768780926674e-07, | |
| "logits/chosen": -2.4390716552734375, | |
| "logits/rejected": -2.3869736194610596, | |
| "logps/chosen": -127.33221435546875, | |
| "logps/rejected": -149.60716247558594, | |
| "loss": 126095.0375, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.020692896097898483, | |
| "rewards/margins": 0.015832407400012016, | |
| "rewards/rejected": -0.03652530163526535, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.7608255766895994, | |
| "grad_norm": 5139359.676607012, | |
| "learning_rate": 1.3292847503373818e-07, | |
| "logits/chosen": -2.4137744903564453, | |
| "logits/rejected": -2.4111902713775635, | |
| "logps/chosen": -138.07791137695312, | |
| "logps/rejected": -150.23367309570312, | |
| "loss": 122845.4375, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.015661675482988358, | |
| "rewards/margins": 0.011621621437370777, | |
| "rewards/rejected": -0.02728329598903656, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.7648725212464589, | |
| "grad_norm": 5436765.081995142, | |
| "learning_rate": 1.3067926225820963e-07, | |
| "logits/chosen": -2.2953848838806152, | |
| "logits/rejected": -2.261265754699707, | |
| "logps/chosen": -131.72573852539062, | |
| "logps/rejected": -158.7117156982422, | |
| "loss": 120437.1625, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.018344033509492874, | |
| "rewards/margins": 0.025271952152252197, | |
| "rewards/rejected": -0.04361598566174507, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.7689194658033185, | |
| "grad_norm": 7773151.683082246, | |
| "learning_rate": 1.2843004948268105e-07, | |
| "logits/chosen": -2.229933023452759, | |
| "logits/rejected": -2.166466474533081, | |
| "logps/chosen": -147.3013153076172, | |
| "logps/rejected": -160.14205932617188, | |
| "loss": 130466.3875, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.024005400016903877, | |
| "rewards/margins": 0.014198745600879192, | |
| "rewards/rejected": -0.038204144686460495, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.7729664103601781, | |
| "grad_norm": 6242141.939931843, | |
| "learning_rate": 1.261808367071525e-07, | |
| "logits/chosen": -2.2624001502990723, | |
| "logits/rejected": -2.229830503463745, | |
| "logps/chosen": -138.10633850097656, | |
| "logps/rejected": -152.7989044189453, | |
| "loss": 127404.4375, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.02552894689142704, | |
| "rewards/margins": 0.008202909491956234, | |
| "rewards/rejected": -0.033731859177351, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.7770133549170376, | |
| "grad_norm": 6920375.30724732, | |
| "learning_rate": 1.2393162393162394e-07, | |
| "logits/chosen": -2.350060224533081, | |
| "logits/rejected": -2.3308169841766357, | |
| "logps/chosen": -132.56320190429688, | |
| "logps/rejected": -153.30557250976562, | |
| "loss": 126830.1, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.029025813564658165, | |
| "rewards/margins": 0.019299551844596863, | |
| "rewards/rejected": -0.04832536727190018, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.7810602994738972, | |
| "grad_norm": 6671009.085790114, | |
| "learning_rate": 1.2168241115609536e-07, | |
| "logits/chosen": -2.2904415130615234, | |
| "logits/rejected": -2.329463481903076, | |
| "logps/chosen": -141.00816345214844, | |
| "logps/rejected": -143.0104217529297, | |
| "loss": 129713.6875, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.03294295817613602, | |
| "rewards/margins": 0.0017857927596196532, | |
| "rewards/rejected": -0.034728746861219406, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.7851072440307568, | |
| "grad_norm": 6655872.215382386, | |
| "learning_rate": 1.194331983805668e-07, | |
| "logits/chosen": -2.3119730949401855, | |
| "logits/rejected": -2.2890148162841797, | |
| "logps/chosen": -131.12327575683594, | |
| "logps/rejected": -148.35281372070312, | |
| "loss": 126911.35, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.024319607764482498, | |
| "rewards/margins": 0.012184834107756615, | |
| "rewards/rejected": -0.036504440009593964, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.7891541885876163, | |
| "grad_norm": 7036617.58409423, | |
| "learning_rate": 1.1718398560503823e-07, | |
| "logits/chosen": -2.3747105598449707, | |
| "logits/rejected": -2.3655359745025635, | |
| "logps/chosen": -127.3541259765625, | |
| "logps/rejected": -142.77593994140625, | |
| "loss": 125537.975, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.017514357343316078, | |
| "rewards/margins": 0.013664362952113152, | |
| "rewards/rejected": -0.03117872215807438, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.7932011331444759, | |
| "grad_norm": 6573765.081555526, | |
| "learning_rate": 1.1493477282950967e-07, | |
| "logits/chosen": -2.412942409515381, | |
| "logits/rejected": -2.390746593475342, | |
| "logps/chosen": -134.2810821533203, | |
| "logps/rejected": -158.460693359375, | |
| "loss": 123726.75, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.021586496382951736, | |
| "rewards/margins": 0.017149869352579117, | |
| "rewards/rejected": -0.03873636573553085, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.7972480777013355, | |
| "grad_norm": 7038673.672056439, | |
| "learning_rate": 1.1268556005398109e-07, | |
| "logits/chosen": -2.370753765106201, | |
| "logits/rejected": -2.3683507442474365, | |
| "logps/chosen": -124.46659851074219, | |
| "logps/rejected": -131.4204559326172, | |
| "loss": 126820.75, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.024847570806741714, | |
| "rewards/margins": 0.011085819453001022, | |
| "rewards/rejected": -0.03593338653445244, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.801295022258195, | |
| "grad_norm": 6451758.008444387, | |
| "learning_rate": 1.1043634727845254e-07, | |
| "logits/chosen": -2.325690984725952, | |
| "logits/rejected": -2.336920976638794, | |
| "logps/chosen": -122.97123718261719, | |
| "logps/rejected": -147.48297119140625, | |
| "loss": 123985.45, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.016694985330104828, | |
| "rewards/margins": 0.016988877207040787, | |
| "rewards/rejected": -0.033683862537145615, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.8053419668150547, | |
| "grad_norm": 9482913.906109469, | |
| "learning_rate": 1.0818713450292397e-07, | |
| "logits/chosen": -2.2602345943450928, | |
| "logits/rejected": -2.243213653564453, | |
| "logps/chosen": -122.68096923828125, | |
| "logps/rejected": -138.34278869628906, | |
| "loss": 124861.1125, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.028803208842873573, | |
| "rewards/margins": 0.015811622142791748, | |
| "rewards/rejected": -0.04461482912302017, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.8093889113719142, | |
| "grad_norm": 6927553.6188276345, | |
| "learning_rate": 1.059379217273954e-07, | |
| "logits/chosen": -2.3502438068389893, | |
| "logits/rejected": -2.337284564971924, | |
| "logps/chosen": -132.24378967285156, | |
| "logps/rejected": -149.02110290527344, | |
| "loss": 125569.4625, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.018480569124221802, | |
| "rewards/margins": 0.016136765480041504, | |
| "rewards/rejected": -0.034617334604263306, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.8134358559287738, | |
| "grad_norm": 6611093.761936569, | |
| "learning_rate": 1.0368870895186684e-07, | |
| "logits/chosen": -2.3684864044189453, | |
| "logits/rejected": -2.324704170227051, | |
| "logps/chosen": -135.81961059570312, | |
| "logps/rejected": -160.5324249267578, | |
| "loss": 121162.075, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.012911828234791756, | |
| "rewards/margins": 0.030595939606428146, | |
| "rewards/rejected": -0.04350776970386505, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.8174828004856334, | |
| "grad_norm": 6723882.38264995, | |
| "learning_rate": 1.0143949617633828e-07, | |
| "logits/chosen": -2.2761006355285645, | |
| "logits/rejected": -2.2606966495513916, | |
| "logps/chosen": -119.88040924072266, | |
| "logps/rejected": -145.41346740722656, | |
| "loss": 129622.7125, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.025995198637247086, | |
| "rewards/margins": 0.019534587860107422, | |
| "rewards/rejected": -0.045529790222644806, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.8215297450424929, | |
| "grad_norm": 8229250.060941711, | |
| "learning_rate": 9.919028340080972e-08, | |
| "logits/chosen": -2.3351616859436035, | |
| "logits/rejected": -2.280089855194092, | |
| "logps/chosen": -138.04751586914062, | |
| "logps/rejected": -154.4039306640625, | |
| "loss": 121636.6375, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.02659204974770546, | |
| "rewards/margins": 0.022092049941420555, | |
| "rewards/rejected": -0.048684097826480865, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.8255766895993525, | |
| "grad_norm": 8360918.973606626, | |
| "learning_rate": 9.694107062528115e-08, | |
| "logits/chosen": -2.302302837371826, | |
| "logits/rejected": -2.3009400367736816, | |
| "logps/chosen": -133.8302459716797, | |
| "logps/rejected": -153.5994110107422, | |
| "loss": 124760.0625, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.020709946751594543, | |
| "rewards/margins": 0.014815042726695538, | |
| "rewards/rejected": -0.03552498668432236, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.8296236341562121, | |
| "grad_norm": 8136804.681969547, | |
| "learning_rate": 9.46918578497526e-08, | |
| "logits/chosen": -2.325496196746826, | |
| "logits/rejected": -2.3160691261291504, | |
| "logps/chosen": -133.07785034179688, | |
| "logps/rejected": -157.1102294921875, | |
| "loss": 122905.6875, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.022938497364521027, | |
| "rewards/margins": 0.02051146700978279, | |
| "rewards/rejected": -0.04344996064901352, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.8336705787130716, | |
| "grad_norm": 5880924.756183454, | |
| "learning_rate": 9.244264507422401e-08, | |
| "logits/chosen": -2.247741460800171, | |
| "logits/rejected": -2.2601161003112793, | |
| "logps/chosen": -138.5823974609375, | |
| "logps/rejected": -150.9891357421875, | |
| "loss": 122247.55, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.021218325942754745, | |
| "rewards/margins": 0.013738051056861877, | |
| "rewards/rejected": -0.034956373274326324, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.8377175232699312, | |
| "grad_norm": 6479318.093365777, | |
| "learning_rate": 9.019343229869546e-08, | |
| "logits/chosen": -2.287973403930664, | |
| "logits/rejected": -2.27508282661438, | |
| "logps/chosen": -148.0543975830078, | |
| "logps/rejected": -174.75563049316406, | |
| "loss": 122681.5375, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.028557494282722473, | |
| "rewards/margins": 0.02148330584168434, | |
| "rewards/rejected": -0.050040800124406815, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.8417644678267908, | |
| "grad_norm": 7530654.549282354, | |
| "learning_rate": 8.794421952316688e-08, | |
| "logits/chosen": -2.3192243576049805, | |
| "logits/rejected": -2.301488161087036, | |
| "logps/chosen": -140.3570556640625, | |
| "logps/rejected": -144.66439819335938, | |
| "loss": 127493.5, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.027281736955046654, | |
| "rewards/margins": 0.011399330571293831, | |
| "rewards/rejected": -0.03868107125163078, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.8458114123836503, | |
| "grad_norm": 5545588.639352677, | |
| "learning_rate": 8.569500674763833e-08, | |
| "logits/chosen": -2.3623504638671875, | |
| "logits/rejected": -2.327298641204834, | |
| "logps/chosen": -125.07554626464844, | |
| "logps/rejected": -162.51771545410156, | |
| "loss": 122307.35, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.023772019892930984, | |
| "rewards/margins": 0.024702411144971848, | |
| "rewards/rejected": -0.048474427312612534, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.8498583569405099, | |
| "grad_norm": 7677881.561277626, | |
| "learning_rate": 8.344579397210976e-08, | |
| "logits/chosen": -2.400023937225342, | |
| "logits/rejected": -2.398374557495117, | |
| "logps/chosen": -143.50096130371094, | |
| "logps/rejected": -154.2194061279297, | |
| "loss": 126753.125, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.03331100195646286, | |
| "rewards/margins": 0.007773646619170904, | |
| "rewards/rejected": -0.0410846471786499, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.8539053014973695, | |
| "grad_norm": 9639697.081950434, | |
| "learning_rate": 8.119658119658119e-08, | |
| "logits/chosen": -2.2588629722595215, | |
| "logits/rejected": -2.2181789875030518, | |
| "logps/chosen": -136.6796417236328, | |
| "logps/rejected": -171.16500854492188, | |
| "loss": 127603.3375, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.03126269578933716, | |
| "rewards/margins": 0.025771383196115494, | |
| "rewards/rejected": -0.057034075260162354, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.857952246054229, | |
| "grad_norm": 5314294.806910229, | |
| "learning_rate": 7.894736842105262e-08, | |
| "logits/chosen": -2.486797332763672, | |
| "logits/rejected": -2.4730780124664307, | |
| "logps/chosen": -146.88571166992188, | |
| "logps/rejected": -158.010986328125, | |
| "loss": 125283.2125, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.020792517811059952, | |
| "rewards/margins": 0.017872992902994156, | |
| "rewards/rejected": -0.03866551071405411, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.8619991906110887, | |
| "grad_norm": 8035986.499722224, | |
| "learning_rate": 7.669815564552407e-08, | |
| "logits/chosen": -2.421731472015381, | |
| "logits/rejected": -2.425063371658325, | |
| "logps/chosen": -116.5892562866211, | |
| "logps/rejected": -132.42050170898438, | |
| "loss": 125860.925, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.021887045353651047, | |
| "rewards/margins": 0.014476152136921883, | |
| "rewards/rejected": -0.03636319935321808, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.8660461351679482, | |
| "grad_norm": 6251020.741893531, | |
| "learning_rate": 7.444894286999549e-08, | |
| "logits/chosen": -2.364879608154297, | |
| "logits/rejected": -2.311974048614502, | |
| "logps/chosen": -120.81207275390625, | |
| "logps/rejected": -145.0894317626953, | |
| "loss": 119764.95, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.02619818225502968, | |
| "rewards/margins": 0.023076878860592842, | |
| "rewards/rejected": -0.04927505925297737, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.8700930797248078, | |
| "grad_norm": 6301369.537300293, | |
| "learning_rate": 7.219973009446694e-08, | |
| "logits/chosen": -2.379647970199585, | |
| "logits/rejected": -2.3476970195770264, | |
| "logps/chosen": -134.33572387695312, | |
| "logps/rejected": -155.9193115234375, | |
| "loss": 118915.75, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.019399352371692657, | |
| "rewards/margins": 0.016515102237462997, | |
| "rewards/rejected": -0.03591445833444595, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.8741400242816674, | |
| "grad_norm": 6200780.785181898, | |
| "learning_rate": 6.995051731893837e-08, | |
| "logits/chosen": -2.4005587100982666, | |
| "logits/rejected": -2.381075382232666, | |
| "logps/chosen": -134.69631958007812, | |
| "logps/rejected": -142.3704071044922, | |
| "loss": 122057.2, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.024357806891202927, | |
| "rewards/margins": 0.01147081982344389, | |
| "rewards/rejected": -0.03582862392067909, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.8781869688385269, | |
| "grad_norm": 7707200.943905766, | |
| "learning_rate": 6.77013045434098e-08, | |
| "logits/chosen": -2.1691622734069824, | |
| "logits/rejected": -2.1518099308013916, | |
| "logps/chosen": -133.11085510253906, | |
| "logps/rejected": -150.32522583007812, | |
| "loss": 124932.0875, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.02330431155860424, | |
| "rewards/margins": 0.0101194242015481, | |
| "rewards/rejected": -0.033423732966184616, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.8822339133953865, | |
| "grad_norm": 6068434.174209909, | |
| "learning_rate": 6.545209176788123e-08, | |
| "logits/chosen": -2.2563586235046387, | |
| "logits/rejected": -2.249168872833252, | |
| "logps/chosen": -126.6658935546875, | |
| "logps/rejected": -150.95640563964844, | |
| "loss": 125160.825, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.015440529212355614, | |
| "rewards/margins": 0.015524588525295258, | |
| "rewards/rejected": -0.030965115875005722, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.8862808579522461, | |
| "grad_norm": 7537201.72606691, | |
| "learning_rate": 6.320287899235267e-08, | |
| "logits/chosen": -2.364108085632324, | |
| "logits/rejected": -2.3574013710021973, | |
| "logps/chosen": -127.92137145996094, | |
| "logps/rejected": -143.64276123046875, | |
| "loss": 128988.85, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.015308101661503315, | |
| "rewards/margins": 0.012061825022101402, | |
| "rewards/rejected": -0.027369925752282143, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.8903278025091056, | |
| "grad_norm": 20144650.289658338, | |
| "learning_rate": 6.095366621682411e-08, | |
| "logits/chosen": -2.3388938903808594, | |
| "logits/rejected": -2.309027910232544, | |
| "logps/chosen": -131.75338745117188, | |
| "logps/rejected": -147.02613830566406, | |
| "loss": 131861.55, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.033888086676597595, | |
| "rewards/margins": 0.011972433887422085, | |
| "rewards/rejected": -0.045860521495342255, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.8943747470659652, | |
| "grad_norm": 7275705.0028550355, | |
| "learning_rate": 5.8704453441295546e-08, | |
| "logits/chosen": -2.4196584224700928, | |
| "logits/rejected": -2.4083645343780518, | |
| "logps/chosen": -137.82431030273438, | |
| "logps/rejected": -152.37692260742188, | |
| "loss": 125663.2125, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.01876234821975231, | |
| "rewards/margins": 0.0189601369202137, | |
| "rewards/rejected": -0.03772248700261116, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.8984216916228248, | |
| "grad_norm": 7203116.155779993, | |
| "learning_rate": 5.645524066576698e-08, | |
| "logits/chosen": -2.4381861686706543, | |
| "logits/rejected": -2.403198003768921, | |
| "logps/chosen": -131.7117156982422, | |
| "logps/rejected": -142.67185974121094, | |
| "loss": 123239.9375, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.01855655573308468, | |
| "rewards/margins": 0.015223322436213493, | |
| "rewards/rejected": -0.03377988189458847, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.9024686361796843, | |
| "grad_norm": 5199237.236235915, | |
| "learning_rate": 5.420602789023841e-08, | |
| "logits/chosen": -2.3613171577453613, | |
| "logits/rejected": -2.2851357460021973, | |
| "logps/chosen": -153.357177734375, | |
| "logps/rejected": -159.00067138671875, | |
| "loss": 123367.6125, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.024445852264761925, | |
| "rewards/margins": 0.012765263207256794, | |
| "rewards/rejected": -0.037211112678050995, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.9065155807365439, | |
| "grad_norm": 8322118.880155748, | |
| "learning_rate": 5.1956815114709844e-08, | |
| "logits/chosen": -2.4463276863098145, | |
| "logits/rejected": -2.4448184967041016, | |
| "logps/chosen": -166.32937622070312, | |
| "logps/rejected": -170.3524932861328, | |
| "loss": 127037.6, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.034686215221881866, | |
| "rewards/margins": 0.006833164487034082, | |
| "rewards/rejected": -0.04151938110589981, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.9105625252934035, | |
| "grad_norm": 7843111.5615214445, | |
| "learning_rate": 4.9707602339181284e-08, | |
| "logits/chosen": -2.406442165374756, | |
| "logits/rejected": -2.3739068508148193, | |
| "logps/chosen": -135.46450805664062, | |
| "logps/rejected": -151.98318481445312, | |
| "loss": 119829.0, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.027578959241509438, | |
| "rewards/margins": 0.019721323624253273, | |
| "rewards/rejected": -0.04730028659105301, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.914609469850263, | |
| "grad_norm": 6508134.384511007, | |
| "learning_rate": 4.745838956365272e-08, | |
| "logits/chosen": -2.374009609222412, | |
| "logits/rejected": -2.330867290496826, | |
| "logps/chosen": -147.81002807617188, | |
| "logps/rejected": -150.0068817138672, | |
| "loss": 123565.525, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.030267197638750076, | |
| "rewards/margins": 0.008453629910945892, | |
| "rewards/rejected": -0.03872082382440567, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.9186564144071226, | |
| "grad_norm": 4876699.168643324, | |
| "learning_rate": 4.5209176788124156e-08, | |
| "logits/chosen": -2.4444994926452637, | |
| "logits/rejected": -2.370756149291992, | |
| "logps/chosen": -148.59286499023438, | |
| "logps/rejected": -159.95957946777344, | |
| "loss": 121402.275, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.027829691767692566, | |
| "rewards/margins": 0.01744781993329525, | |
| "rewards/rejected": -0.045277513563632965, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.9227033589639821, | |
| "grad_norm": 7528661.562948466, | |
| "learning_rate": 4.2959964012595596e-08, | |
| "logits/chosen": -2.421567678451538, | |
| "logits/rejected": -2.4027442932128906, | |
| "logps/chosen": -138.11740112304688, | |
| "logps/rejected": -148.74745178222656, | |
| "loss": 125506.075, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.025351068004965782, | |
| "rewards/margins": 0.01246053259819746, | |
| "rewards/rejected": -0.037811603397130966, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.9267503035208418, | |
| "grad_norm": 5869282.00575302, | |
| "learning_rate": 4.071075123706703e-08, | |
| "logits/chosen": -2.344552993774414, | |
| "logits/rejected": -2.309044122695923, | |
| "logps/chosen": -135.99095153808594, | |
| "logps/rejected": -162.00845336914062, | |
| "loss": 119300.3375, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.01940598525106907, | |
| "rewards/margins": 0.02190936915576458, | |
| "rewards/rejected": -0.04131535068154335, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.9307972480777014, | |
| "grad_norm": 6508908.496606901, | |
| "learning_rate": 3.846153846153846e-08, | |
| "logits/chosen": -2.2711312770843506, | |
| "logits/rejected": -2.235738754272461, | |
| "logps/chosen": -156.75537109375, | |
| "logps/rejected": -162.16207885742188, | |
| "loss": 121451.2625, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.0278861615806818, | |
| "rewards/margins": 0.010201343335211277, | |
| "rewards/rejected": -0.038087502121925354, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.9348441926345609, | |
| "grad_norm": 8865730.632557675, | |
| "learning_rate": 3.6212325686009894e-08, | |
| "logits/chosen": -2.324096202850342, | |
| "logits/rejected": -2.2920069694519043, | |
| "logps/chosen": -117.9946060180664, | |
| "logps/rejected": -133.47108459472656, | |
| "loss": 122291.0625, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.022024232894182205, | |
| "rewards/margins": 0.014470313675701618, | |
| "rewards/rejected": -0.03649454563856125, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.9388911371914205, | |
| "grad_norm": 9971453.387611723, | |
| "learning_rate": 3.3963112910481334e-08, | |
| "logits/chosen": -2.409850597381592, | |
| "logits/rejected": -2.3329081535339355, | |
| "logps/chosen": -146.290283203125, | |
| "logps/rejected": -182.57904052734375, | |
| "loss": 120819.0, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.024361763149499893, | |
| "rewards/margins": 0.03630813583731651, | |
| "rewards/rejected": -0.060669898986816406, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.9429380817482801, | |
| "grad_norm": 6070350.046980263, | |
| "learning_rate": 3.1713900134952766e-08, | |
| "logits/chosen": -2.34000301361084, | |
| "logits/rejected": -2.3207154273986816, | |
| "logps/chosen": -135.71852111816406, | |
| "logps/rejected": -158.15370178222656, | |
| "loss": 127869.825, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.026078131049871445, | |
| "rewards/margins": 0.018510058522224426, | |
| "rewards/rejected": -0.04458818584680557, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.9469850263051396, | |
| "grad_norm": 8157133.764333476, | |
| "learning_rate": 2.94646873594242e-08, | |
| "logits/chosen": -2.4080350399017334, | |
| "logits/rejected": -2.3741536140441895, | |
| "logps/chosen": -139.84112548828125, | |
| "logps/rejected": -175.55917358398438, | |
| "loss": 126005.05, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.029435906559228897, | |
| "rewards/margins": 0.02673642337322235, | |
| "rewards/rejected": -0.05617233365774155, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.9510319708619992, | |
| "grad_norm": 8440293.351897202, | |
| "learning_rate": 2.7215474583895635e-08, | |
| "logits/chosen": -2.4163994789123535, | |
| "logits/rejected": -2.384222984313965, | |
| "logps/chosen": -158.38975524902344, | |
| "logps/rejected": -164.0137176513672, | |
| "loss": 123603.3875, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.03101927414536476, | |
| "rewards/margins": 0.011726012453436852, | |
| "rewards/rejected": -0.042745284736156464, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.9550789154188588, | |
| "grad_norm": 6375292.788715957, | |
| "learning_rate": 2.496626180836707e-08, | |
| "logits/chosen": -2.2860450744628906, | |
| "logits/rejected": -2.28193998336792, | |
| "logps/chosen": -137.55361938476562, | |
| "logps/rejected": -162.74652099609375, | |
| "loss": 125693.3125, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.024988356977701187, | |
| "rewards/margins": 0.023718636482954025, | |
| "rewards/rejected": -0.04870699718594551, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.9591258599757183, | |
| "grad_norm": 9472423.8957588, | |
| "learning_rate": 2.2717049032838504e-08, | |
| "logits/chosen": -2.359046459197998, | |
| "logits/rejected": -2.3519136905670166, | |
| "logps/chosen": -135.88697814941406, | |
| "logps/rejected": -158.06321716308594, | |
| "loss": 127380.25, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.027975231409072876, | |
| "rewards/margins": 0.01904093287885189, | |
| "rewards/rejected": -0.04701615869998932, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.9631728045325779, | |
| "grad_norm": 6950351.310431428, | |
| "learning_rate": 2.046783625730994e-08, | |
| "logits/chosen": -2.278303623199463, | |
| "logits/rejected": -2.271866798400879, | |
| "logps/chosen": -147.02255249023438, | |
| "logps/rejected": -162.41444396972656, | |
| "loss": 131236.1875, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.03206818923354149, | |
| "rewards/margins": 0.013242989778518677, | |
| "rewards/rejected": -0.045311179012060165, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.9672197490894374, | |
| "grad_norm": 6190616.100642323, | |
| "learning_rate": 1.8218623481781373e-08, | |
| "logits/chosen": -2.3274073600769043, | |
| "logits/rejected": -2.2292959690093994, | |
| "logps/chosen": -152.0672149658203, | |
| "logps/rejected": -174.8033905029297, | |
| "loss": 124131.975, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.027064388617873192, | |
| "rewards/margins": 0.015537412837147713, | |
| "rewards/rejected": -0.042601801455020905, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.971266693646297, | |
| "grad_norm": 8140978.954292232, | |
| "learning_rate": 1.5969410706252813e-08, | |
| "logits/chosen": -2.3674769401550293, | |
| "logits/rejected": -2.3571083545684814, | |
| "logps/chosen": -144.0354461669922, | |
| "logps/rejected": -160.3311004638672, | |
| "loss": 125102.2375, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.02302565984427929, | |
| "rewards/margins": 0.0166311115026474, | |
| "rewards/rejected": -0.03965677320957184, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.9753136382031566, | |
| "grad_norm": 8862305.552745355, | |
| "learning_rate": 1.3720197930724246e-08, | |
| "logits/chosen": -2.178356647491455, | |
| "logits/rejected": -2.179384708404541, | |
| "logps/chosen": -143.9452362060547, | |
| "logps/rejected": -151.89974975585938, | |
| "loss": 123180.975, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.035528309643268585, | |
| "rewards/margins": 0.007553645875304937, | |
| "rewards/rejected": -0.04308196157217026, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.9793605827600161, | |
| "grad_norm": 4848306.613269352, | |
| "learning_rate": 1.1470985155195682e-08, | |
| "logits/chosen": -2.402296781539917, | |
| "logits/rejected": -2.3765056133270264, | |
| "logps/chosen": -125.8743896484375, | |
| "logps/rejected": -145.01162719726562, | |
| "loss": 122925.2125, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.017237985506653786, | |
| "rewards/margins": 0.019860463216900826, | |
| "rewards/rejected": -0.03709845244884491, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.9834075273168758, | |
| "grad_norm": 5809904.408249709, | |
| "learning_rate": 9.221772379667116e-09, | |
| "logits/chosen": -2.4065396785736084, | |
| "logits/rejected": -2.3716368675231934, | |
| "logps/chosen": -143.05075073242188, | |
| "logps/rejected": -167.95664978027344, | |
| "loss": 124604.825, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.029827838763594627, | |
| "rewards/margins": 0.02450350485742092, | |
| "rewards/rejected": -0.05433133989572525, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.9874544718737354, | |
| "grad_norm": 7148593.283376818, | |
| "learning_rate": 6.972559604138551e-09, | |
| "logits/chosen": -2.3499531745910645, | |
| "logits/rejected": -2.3520779609680176, | |
| "logps/chosen": -130.91500854492188, | |
| "logps/rejected": -159.89820861816406, | |
| "loss": 119113.2625, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.026112830266356468, | |
| "rewards/margins": 0.027585214003920555, | |
| "rewards/rejected": -0.05369805172085762, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.9915014164305949, | |
| "grad_norm": 7332505.899956737, | |
| "learning_rate": 4.723346828609986e-09, | |
| "logits/chosen": -2.3807873725891113, | |
| "logits/rejected": -2.3282299041748047, | |
| "logps/chosen": -138.15525817871094, | |
| "logps/rejected": -150.75531005859375, | |
| "loss": 124786.675, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.02242584154009819, | |
| "rewards/margins": 0.013798736035823822, | |
| "rewards/rejected": -0.03622458130121231, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.9955483609874545, | |
| "grad_norm": 5408793.194556523, | |
| "learning_rate": 2.474134053081421e-09, | |
| "logits/chosen": -2.305051803588867, | |
| "logits/rejected": -2.2709367275238037, | |
| "logps/chosen": -127.0162124633789, | |
| "logps/rejected": -154.36273193359375, | |
| "loss": 125528.575, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.02522462233901024, | |
| "rewards/margins": 0.017697211354970932, | |
| "rewards/rejected": -0.04292182996869087, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.9995953055443141, | |
| "grad_norm": 6547393.597919743, | |
| "learning_rate": 2.249212775528565e-10, | |
| "logits/chosen": -2.3911020755767822, | |
| "logits/rejected": -2.3864622116088867, | |
| "logps/chosen": -147.30072021484375, | |
| "logps/rejected": -168.179443359375, | |
| "loss": 121667.85, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.027930116280913353, | |
| "rewards/margins": 0.010017314925789833, | |
| "rewards/rejected": -0.037947431206703186, | |
| "step": 2470 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 2471, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |