| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 100, | |
| "global_step": 3821, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "grad_norm": 3.8125, | |
| "learning_rate": 1.3054830287206268e-08, | |
| "logits/chosen": -2.377302885055542, | |
| "logits/rejected": -2.2193148136138916, | |
| "logps/chosen": -290.4185485839844, | |
| "logps/rejected": -374.6668701171875, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "grad_norm": 2.40625, | |
| "learning_rate": 1.3054830287206266e-07, | |
| "logits/chosen": -2.2492425441741943, | |
| "logits/rejected": -2.0517687797546387, | |
| "logps/chosen": -279.6344909667969, | |
| "logps/rejected": -245.47564697265625, | |
| "loss": 0.6928, | |
| "rewards/accuracies": 0.4930555522441864, | |
| "rewards/chosen": 0.0005959311965852976, | |
| "rewards/margins": 0.000615339376963675, | |
| "rewards/rejected": -1.9408274965826422e-05, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 2.5, | |
| "learning_rate": 2.610966057441253e-07, | |
| "logits/chosen": -2.244947671890259, | |
| "logits/rejected": -1.943969964981079, | |
| "logps/chosen": -305.4734802246094, | |
| "logps/rejected": -237.70083618164062, | |
| "loss": 0.6928, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": 0.004091521259397268, | |
| "rewards/margins": 0.000647729029878974, | |
| "rewards/rejected": 0.0034437919966876507, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 2.3125, | |
| "learning_rate": 3.9164490861618804e-07, | |
| "logits/chosen": -2.205514907836914, | |
| "logits/rejected": -2.1370320320129395, | |
| "logps/chosen": -251.25662231445312, | |
| "logps/rejected": -251.41213989257812, | |
| "loss": 0.6922, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": 0.011662699282169342, | |
| "rewards/margins": 0.0018940108129754663, | |
| "rewards/rejected": 0.00976868998259306, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 1.9453125, | |
| "learning_rate": 5.221932114882506e-07, | |
| "logits/chosen": -2.0618391036987305, | |
| "logits/rejected": -2.0241973400115967, | |
| "logps/chosen": -216.21438598632812, | |
| "logps/rejected": -221.6951141357422, | |
| "loss": 0.6915, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": 0.019298259168863297, | |
| "rewards/margins": 0.0034350629430264235, | |
| "rewards/rejected": 0.015863195061683655, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 2.078125, | |
| "learning_rate": 6.527415143603135e-07, | |
| "logits/chosen": -2.1124298572540283, | |
| "logits/rejected": -2.1008057594299316, | |
| "logps/chosen": -266.8966064453125, | |
| "logps/rejected": -234.32998657226562, | |
| "loss": 0.6906, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": 0.029909158125519753, | |
| "rewards/margins": 0.005118774715811014, | |
| "rewards/rejected": 0.024790380150079727, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 2.125, | |
| "learning_rate": 7.832898172323761e-07, | |
| "logits/chosen": -2.099602460861206, | |
| "logits/rejected": -1.9424635171890259, | |
| "logps/chosen": -252.27310180664062, | |
| "logps/rejected": -226.72030639648438, | |
| "loss": 0.6897, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": 0.03226853534579277, | |
| "rewards/margins": 0.007134293206036091, | |
| "rewards/rejected": 0.025134241208434105, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 2.03125, | |
| "learning_rate": 9.138381201044387e-07, | |
| "logits/chosen": -2.2440109252929688, | |
| "logits/rejected": -2.036339282989502, | |
| "logps/chosen": -272.09234619140625, | |
| "logps/rejected": -246.6947784423828, | |
| "loss": 0.6881, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": 0.04063863307237625, | |
| "rewards/margins": 0.010386193171143532, | |
| "rewards/rejected": 0.03025243617594242, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 2.359375, | |
| "learning_rate": 1.0443864229765013e-06, | |
| "logits/chosen": -2.1541717052459717, | |
| "logits/rejected": -1.9777501821517944, | |
| "logps/chosen": -257.61871337890625, | |
| "logps/rejected": -246.86483764648438, | |
| "loss": 0.6874, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": 0.038099195808172226, | |
| "rewards/margins": 0.011846454814076424, | |
| "rewards/rejected": 0.026252740994095802, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 2.1875, | |
| "learning_rate": 1.1749347258485642e-06, | |
| "logits/chosen": -2.1348958015441895, | |
| "logits/rejected": -1.998792290687561, | |
| "logps/chosen": -250.1610107421875, | |
| "logps/rejected": -234.56787109375, | |
| "loss": 0.6846, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": 0.04175186529755592, | |
| "rewards/margins": 0.01758204773068428, | |
| "rewards/rejected": 0.024169817566871643, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 2.125, | |
| "learning_rate": 1.305483028720627e-06, | |
| "logits/chosen": -2.1793951988220215, | |
| "logits/rejected": -2.0686168670654297, | |
| "logps/chosen": -247.0215301513672, | |
| "logps/rejected": -230.79537963867188, | |
| "loss": 0.6821, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": 0.04747994989156723, | |
| "rewards/margins": 0.0229250006377697, | |
| "rewards/rejected": 0.024554943665862083, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_logits/chosen": -2.0950841903686523, | |
| "eval_logits/rejected": -1.9557065963745117, | |
| "eval_logps/chosen": -259.6705627441406, | |
| "eval_logps/rejected": -241.93917846679688, | |
| "eval_loss": 0.6820979714393616, | |
| "eval_rewards/accuracies": 0.656499981880188, | |
| "eval_rewards/chosen": 0.04981444031000137, | |
| "eval_rewards/margins": 0.02312026545405388, | |
| "eval_rewards/rejected": 0.026694171130657196, | |
| "eval_runtime": 385.815, | |
| "eval_samples_per_second": 5.184, | |
| "eval_steps_per_second": 0.648, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 2.3125, | |
| "learning_rate": 1.4360313315926894e-06, | |
| "logits/chosen": -2.146080493927002, | |
| "logits/rejected": -2.002453327178955, | |
| "logps/chosen": -284.4079895019531, | |
| "logps/rejected": -238.9375457763672, | |
| "loss": 0.6791, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.04955831170082092, | |
| "rewards/margins": 0.029538575559854507, | |
| "rewards/rejected": 0.020019738003611565, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 2.15625, | |
| "learning_rate": 1.5665796344647521e-06, | |
| "logits/chosen": -2.1928741931915283, | |
| "logits/rejected": -2.0533928871154785, | |
| "logps/chosen": -287.5110778808594, | |
| "logps/rejected": -271.9446716308594, | |
| "loss": 0.6728, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.05533873289823532, | |
| "rewards/margins": 0.04249165579676628, | |
| "rewards/rejected": 0.01284707523882389, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 2.671875, | |
| "learning_rate": 1.6971279373368146e-06, | |
| "logits/chosen": -2.2082314491271973, | |
| "logits/rejected": -2.118213653564453, | |
| "logps/chosen": -250.14013671875, | |
| "logps/rejected": -252.6034393310547, | |
| "loss": 0.6701, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.05011880397796631, | |
| "rewards/margins": 0.048879969865083694, | |
| "rewards/rejected": 0.001238831551745534, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 2.5, | |
| "learning_rate": 1.8276762402088774e-06, | |
| "logits/chosen": -2.24537992477417, | |
| "logits/rejected": -1.9110206365585327, | |
| "logps/chosen": -270.5356750488281, | |
| "logps/rejected": -226.2827606201172, | |
| "loss": 0.6686, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.04202268272638321, | |
| "rewards/margins": 0.052745603024959564, | |
| "rewards/rejected": -0.010722924955189228, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 2.65625, | |
| "learning_rate": 1.9582245430809403e-06, | |
| "logits/chosen": -2.264875888824463, | |
| "logits/rejected": -2.0387892723083496, | |
| "logps/chosen": -280.36077880859375, | |
| "logps/rejected": -242.8515625, | |
| "loss": 0.6676, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.036501698195934296, | |
| "rewards/margins": 0.05586882680654526, | |
| "rewards/rejected": -0.019367124885320663, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 2.71875, | |
| "learning_rate": 2.0887728459530026e-06, | |
| "logits/chosen": -2.15449595451355, | |
| "logits/rejected": -2.0523486137390137, | |
| "logps/chosen": -256.1204833984375, | |
| "logps/rejected": -261.9712219238281, | |
| "loss": 0.6686, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.007034213747829199, | |
| "rewards/margins": 0.0563817024230957, | |
| "rewards/rejected": -0.04934748262166977, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 2.890625, | |
| "learning_rate": 2.2193211488250653e-06, | |
| "logits/chosen": -2.1238508224487305, | |
| "logits/rejected": -1.9688222408294678, | |
| "logps/chosen": -220.9573211669922, | |
| "logps/rejected": -228.40869140625, | |
| "loss": 0.6703, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.003666641190648079, | |
| "rewards/margins": 0.05213465169072151, | |
| "rewards/rejected": -0.05580129101872444, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 3.28125, | |
| "learning_rate": 2.3498694516971284e-06, | |
| "logits/chosen": -2.1223385334014893, | |
| "logits/rejected": -1.9868714809417725, | |
| "logps/chosen": -258.9825134277344, | |
| "logps/rejected": -252.4698944091797, | |
| "loss": 0.6638, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.03278004750609398, | |
| "rewards/margins": 0.06821247935295105, | |
| "rewards/rejected": -0.10099252313375473, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 3.890625, | |
| "learning_rate": 2.4804177545691907e-06, | |
| "logits/chosen": -2.2460696697235107, | |
| "logits/rejected": -2.0304675102233887, | |
| "logps/chosen": -274.5130920410156, | |
| "logps/rejected": -256.2106628417969, | |
| "loss": 0.65, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.04428885504603386, | |
| "rewards/margins": 0.10040076822042465, | |
| "rewards/rejected": -0.1446896344423294, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 3.375, | |
| "learning_rate": 2.610966057441254e-06, | |
| "logits/chosen": -2.1960341930389404, | |
| "logits/rejected": -1.95565927028656, | |
| "logps/chosen": -259.01934814453125, | |
| "logps/rejected": -231.2660369873047, | |
| "loss": 0.6496, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -0.09310005605220795, | |
| "rewards/margins": 0.10362167656421661, | |
| "rewards/rejected": -0.19672173261642456, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_logits/chosen": -2.068035125732422, | |
| "eval_logits/rejected": -1.9312690496444702, | |
| "eval_logps/chosen": -270.0797119140625, | |
| "eval_logps/rejected": -260.6905517578125, | |
| "eval_loss": 0.6486819982528687, | |
| "eval_rewards/accuracies": 0.6809999942779541, | |
| "eval_rewards/chosen": -0.05427735298871994, | |
| "eval_rewards/margins": 0.10654205083847046, | |
| "eval_rewards/rejected": -0.160819411277771, | |
| "eval_runtime": 385.2774, | |
| "eval_samples_per_second": 5.191, | |
| "eval_steps_per_second": 0.649, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 3.484375, | |
| "learning_rate": 2.741514360313316e-06, | |
| "logits/chosen": -2.197986602783203, | |
| "logits/rejected": -1.9808934926986694, | |
| "logps/chosen": -267.27685546875, | |
| "logps/rejected": -249.9297637939453, | |
| "loss": 0.6319, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -0.05056775361299515, | |
| "rewards/margins": 0.14322780072689056, | |
| "rewards/rejected": -0.1937955617904663, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 5.09375, | |
| "learning_rate": 2.872062663185379e-06, | |
| "logits/chosen": -2.0990307331085205, | |
| "logits/rejected": -1.983565330505371, | |
| "logps/chosen": -270.3437194824219, | |
| "logps/rejected": -256.6988525390625, | |
| "loss": 0.6401, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.16327962279319763, | |
| "rewards/margins": 0.12751872837543488, | |
| "rewards/rejected": -0.2907983660697937, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 5.0625, | |
| "learning_rate": 3.0026109660574416e-06, | |
| "logits/chosen": -2.2433676719665527, | |
| "logits/rejected": -2.056224822998047, | |
| "logps/chosen": -314.1068420410156, | |
| "logps/rejected": -288.00250244140625, | |
| "loss": 0.6589, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.07937607169151306, | |
| "rewards/margins": 0.09387041628360748, | |
| "rewards/rejected": -0.17324648797512054, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 5.6875, | |
| "learning_rate": 3.1331592689295043e-06, | |
| "logits/chosen": -2.1602792739868164, | |
| "logits/rejected": -1.9714686870574951, | |
| "logps/chosen": -310.117919921875, | |
| "logps/rejected": -308.3526916503906, | |
| "loss": 0.6431, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.13817985355854034, | |
| "rewards/margins": 0.13379593193531036, | |
| "rewards/rejected": -0.2719758152961731, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 5.4375, | |
| "learning_rate": 3.263707571801567e-06, | |
| "logits/chosen": -2.129748821258545, | |
| "logits/rejected": -2.028604030609131, | |
| "logps/chosen": -282.7078552246094, | |
| "logps/rejected": -272.08837890625, | |
| "loss": 0.6361, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -0.23127944767475128, | |
| "rewards/margins": 0.14839713275432587, | |
| "rewards/rejected": -0.37967658042907715, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 3.65625, | |
| "learning_rate": 3.3942558746736293e-06, | |
| "logits/chosen": -2.183048725128174, | |
| "logits/rejected": -1.9789161682128906, | |
| "logps/chosen": -281.8155212402344, | |
| "logps/rejected": -272.23956298828125, | |
| "loss": 0.6437, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.09410645067691803, | |
| "rewards/margins": 0.12439638376235962, | |
| "rewards/rejected": -0.21850283443927765, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 3.921875, | |
| "learning_rate": 3.524804177545692e-06, | |
| "logits/chosen": -2.083225965499878, | |
| "logits/rejected": -1.9568647146224976, | |
| "logps/chosen": -275.0286560058594, | |
| "logps/rejected": -263.38140869140625, | |
| "loss": 0.6139, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -0.14354154467582703, | |
| "rewards/margins": 0.19759733974933624, | |
| "rewards/rejected": -0.3411388695240021, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 4.8125, | |
| "learning_rate": 3.6553524804177547e-06, | |
| "logits/chosen": -2.1453604698181152, | |
| "logits/rejected": -1.9743705987930298, | |
| "logps/chosen": -287.78057861328125, | |
| "logps/rejected": -284.1526794433594, | |
| "loss": 0.6277, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.28831422328948975, | |
| "rewards/margins": 0.17838594317436218, | |
| "rewards/rejected": -0.4667002260684967, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 4.15625, | |
| "learning_rate": 3.7859007832898174e-06, | |
| "logits/chosen": -2.068016529083252, | |
| "logits/rejected": -1.9705654382705688, | |
| "logps/chosen": -315.2586364746094, | |
| "logps/rejected": -313.2366027832031, | |
| "loss": 0.6125, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.2962488532066345, | |
| "rewards/margins": 0.2308805286884308, | |
| "rewards/rejected": -0.5271294116973877, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 5.8125, | |
| "learning_rate": 3.9164490861618806e-06, | |
| "logits/chosen": -2.1018004417419434, | |
| "logits/rejected": -1.8998439311981201, | |
| "logps/chosen": -275.9500732421875, | |
| "logps/rejected": -287.0372009277344, | |
| "loss": 0.6042, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -0.31821924448013306, | |
| "rewards/margins": 0.24809296429157257, | |
| "rewards/rejected": -0.5663121938705444, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_logits/chosen": -2.0229153633117676, | |
| "eval_logits/rejected": -1.889541745185852, | |
| "eval_logps/chosen": -295.1513671875, | |
| "eval_logps/rejected": -296.011474609375, | |
| "eval_loss": 0.6216087937355042, | |
| "eval_rewards/accuracies": 0.6729999780654907, | |
| "eval_rewards/chosen": -0.30499377846717834, | |
| "eval_rewards/margins": 0.2090347856283188, | |
| "eval_rewards/rejected": -0.5140285491943359, | |
| "eval_runtime": 385.3276, | |
| "eval_samples_per_second": 5.19, | |
| "eval_steps_per_second": 0.649, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 6.78125, | |
| "learning_rate": 4.046997389033943e-06, | |
| "logits/chosen": -2.246411085128784, | |
| "logits/rejected": -2.0464656352996826, | |
| "logps/chosen": -320.37054443359375, | |
| "logps/rejected": -296.6560363769531, | |
| "loss": 0.5823, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.31464242935180664, | |
| "rewards/margins": 0.29925835132598877, | |
| "rewards/rejected": -0.6139007806777954, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 4.90625, | |
| "learning_rate": 4.177545691906005e-06, | |
| "logits/chosen": -2.1202454566955566, | |
| "logits/rejected": -1.933571457862854, | |
| "logps/chosen": -300.3293151855469, | |
| "logps/rejected": -303.07177734375, | |
| "loss": 0.6333, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.42065340280532837, | |
| "rewards/margins": 0.19771243631839752, | |
| "rewards/rejected": -0.6183657646179199, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 5.65625, | |
| "learning_rate": 4.308093994778068e-06, | |
| "logits/chosen": -2.0555598735809326, | |
| "logits/rejected": -1.9103734493255615, | |
| "logps/chosen": -286.83306884765625, | |
| "logps/rejected": -285.3974609375, | |
| "loss": 0.6152, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.2723875939846039, | |
| "rewards/margins": 0.22743086516857147, | |
| "rewards/rejected": -0.49981847405433655, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 3.6875, | |
| "learning_rate": 4.4386422976501306e-06, | |
| "logits/chosen": -2.1098897457122803, | |
| "logits/rejected": -1.996603012084961, | |
| "logps/chosen": -339.12225341796875, | |
| "logps/rejected": -342.5606994628906, | |
| "loss": 0.611, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.49436426162719727, | |
| "rewards/margins": 0.24838733673095703, | |
| "rewards/rejected": -0.7427516579627991, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 4.71875, | |
| "learning_rate": 4.569190600522193e-06, | |
| "logits/chosen": -2.0351061820983887, | |
| "logits/rejected": -1.8878052234649658, | |
| "logps/chosen": -342.15667724609375, | |
| "logps/rejected": -348.20281982421875, | |
| "loss": 0.6434, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -0.5289834141731262, | |
| "rewards/margins": 0.1753660887479782, | |
| "rewards/rejected": -0.7043493986129761, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 3.84375, | |
| "learning_rate": 4.699738903394257e-06, | |
| "logits/chosen": -2.014333963394165, | |
| "logits/rejected": -1.9689722061157227, | |
| "logps/chosen": -274.50213623046875, | |
| "logps/rejected": -278.16351318359375, | |
| "loss": 0.6081, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -0.33518165349960327, | |
| "rewards/margins": 0.24500660598278046, | |
| "rewards/rejected": -0.5801882743835449, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 5.09375, | |
| "learning_rate": 4.8302872062663196e-06, | |
| "logits/chosen": -2.1054439544677734, | |
| "logits/rejected": -1.9295707941055298, | |
| "logps/chosen": -315.6613464355469, | |
| "logps/rejected": -300.81231689453125, | |
| "loss": 0.5976, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -0.38765162229537964, | |
| "rewards/margins": 0.2898003160953522, | |
| "rewards/rejected": -0.6774519681930542, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 6.375, | |
| "learning_rate": 4.9608355091383814e-06, | |
| "logits/chosen": -2.0662331581115723, | |
| "logits/rejected": -1.8568542003631592, | |
| "logps/chosen": -335.3840637207031, | |
| "logps/rejected": -334.6043395996094, | |
| "loss": 0.5885, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -0.373274028301239, | |
| "rewards/margins": 0.3260083794593811, | |
| "rewards/rejected": -0.6992824077606201, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 5.53125, | |
| "learning_rate": 4.9999488562447675e-06, | |
| "logits/chosen": -2.0750114917755127, | |
| "logits/rejected": -1.9580342769622803, | |
| "logps/chosen": -320.772705078125, | |
| "logps/rejected": -333.44476318359375, | |
| "loss": 0.5855, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -0.39726486802101135, | |
| "rewards/margins": 0.3268759846687317, | |
| "rewards/rejected": -0.7241408228874207, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 5.3125, | |
| "learning_rate": 4.999698361256577e-06, | |
| "logits/chosen": -2.0969738960266113, | |
| "logits/rejected": -1.8604263067245483, | |
| "logps/chosen": -311.9226989746094, | |
| "logps/rejected": -294.60662841796875, | |
| "loss": 0.6218, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": -0.4644620418548584, | |
| "rewards/margins": 0.23335090279579163, | |
| "rewards/rejected": -0.6978129148483276, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_logits/chosen": -1.9431427717208862, | |
| "eval_logits/rejected": -1.8155378103256226, | |
| "eval_logps/chosen": -326.5406799316406, | |
| "eval_logps/rejected": -340.4455261230469, | |
| "eval_loss": 0.5939911007881165, | |
| "eval_rewards/accuracies": 0.6809999942779541, | |
| "eval_rewards/chosen": -0.6188870072364807, | |
| "eval_rewards/margins": 0.3394821286201477, | |
| "eval_rewards/rejected": -0.9583691358566284, | |
| "eval_runtime": 385.2303, | |
| "eval_samples_per_second": 5.192, | |
| "eval_steps_per_second": 0.649, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 5.59375, | |
| "learning_rate": 4.999239142174581e-06, | |
| "logits/chosen": -1.9562289714813232, | |
| "logits/rejected": -1.8964239358901978, | |
| "logps/chosen": -315.13616943359375, | |
| "logps/rejected": -334.50677490234375, | |
| "loss": 0.6431, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -0.7281379699707031, | |
| "rewards/margins": 0.2114681750535965, | |
| "rewards/rejected": -0.9396060705184937, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 6.28125, | |
| "learning_rate": 4.99857123734344e-06, | |
| "logits/chosen": -1.9491183757781982, | |
| "logits/rejected": -1.8290717601776123, | |
| "logps/chosen": -280.4700622558594, | |
| "logps/rejected": -309.1809997558594, | |
| "loss": 0.5735, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.5167919397354126, | |
| "rewards/margins": 0.3777889609336853, | |
| "rewards/rejected": -0.8945809602737427, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 9.5, | |
| "learning_rate": 4.997694702533016e-06, | |
| "logits/chosen": -1.9259364604949951, | |
| "logits/rejected": -1.8644450902938843, | |
| "logps/chosen": -345.35797119140625, | |
| "logps/rejected": -365.54449462890625, | |
| "loss": 0.5722, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -0.6780990362167358, | |
| "rewards/margins": 0.4380587637424469, | |
| "rewards/rejected": -1.1161577701568604, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 7.59375, | |
| "learning_rate": 4.996609610933713e-06, | |
| "logits/chosen": -2.0121302604675293, | |
| "logits/rejected": -1.9294341802597046, | |
| "logps/chosen": -349.0380554199219, | |
| "logps/rejected": -362.43768310546875, | |
| "loss": 0.5912, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -0.7688915729522705, | |
| "rewards/margins": 0.4175523817539215, | |
| "rewards/rejected": -1.1864439249038696, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 5.46875, | |
| "learning_rate": 4.995316053150366e-06, | |
| "logits/chosen": -1.889850378036499, | |
| "logits/rejected": -1.7697973251342773, | |
| "logps/chosen": -332.23077392578125, | |
| "logps/rejected": -353.26593017578125, | |
| "loss": 0.5642, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -0.643971860408783, | |
| "rewards/margins": 0.4406636357307434, | |
| "rewards/rejected": -1.0846354961395264, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 9.875, | |
| "learning_rate": 4.9938141371946815e-06, | |
| "logits/chosen": -1.8695566654205322, | |
| "logits/rejected": -1.7812881469726562, | |
| "logps/chosen": -366.63818359375, | |
| "logps/rejected": -409.49755859375, | |
| "loss": 0.5388, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -1.0153841972351074, | |
| "rewards/margins": 0.5738715529441833, | |
| "rewards/rejected": -1.589255690574646, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 7.46875, | |
| "learning_rate": 4.992103988476206e-06, | |
| "logits/chosen": -1.8687667846679688, | |
| "logits/rejected": -1.7270047664642334, | |
| "logps/chosen": -376.8227844238281, | |
| "logps/rejected": -413.8404846191406, | |
| "loss": 0.5719, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -1.3300559520721436, | |
| "rewards/margins": 0.49565353989601135, | |
| "rewards/rejected": -1.825709581375122, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 6.0625, | |
| "learning_rate": 4.990185749791866e-06, | |
| "logits/chosen": -1.8790470361709595, | |
| "logits/rejected": -1.7465674877166748, | |
| "logps/chosen": -361.17974853515625, | |
| "logps/rejected": -419.521484375, | |
| "loss": 0.5472, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -1.0430080890655518, | |
| "rewards/margins": 0.5655397176742554, | |
| "rewards/rejected": -1.6085479259490967, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 6.53125, | |
| "learning_rate": 4.9880595813140395e-06, | |
| "logits/chosen": -1.923179268836975, | |
| "logits/rejected": -1.7839629650115967, | |
| "logps/chosen": -394.8546142578125, | |
| "logps/rejected": -421.29730224609375, | |
| "loss": 0.5317, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -1.0996313095092773, | |
| "rewards/margins": 0.5679025053977966, | |
| "rewards/rejected": -1.6675338745117188, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 6.25, | |
| "learning_rate": 4.985725660577184e-06, | |
| "logits/chosen": -1.887112021446228, | |
| "logits/rejected": -1.7504981756210327, | |
| "logps/chosen": -411.74951171875, | |
| "logps/rejected": -424.2745666503906, | |
| "loss": 0.5674, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -1.3798956871032715, | |
| "rewards/margins": 0.5375889539718628, | |
| "rewards/rejected": -1.9174845218658447, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_logits/chosen": -1.7892649173736572, | |
| "eval_logits/rejected": -1.6636674404144287, | |
| "eval_logps/chosen": -421.9456787109375, | |
| "eval_logps/rejected": -449.8769836425781, | |
| "eval_loss": 0.5779695510864258, | |
| "eval_rewards/accuracies": 0.7039999961853027, | |
| "eval_rewards/chosen": -1.572936773300171, | |
| "eval_rewards/margins": 0.4797472655773163, | |
| "eval_rewards/rejected": -2.0526838302612305, | |
| "eval_runtime": 385.3091, | |
| "eval_samples_per_second": 5.191, | |
| "eval_steps_per_second": 0.649, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 5.40625, | |
| "learning_rate": 4.983184182463009e-06, | |
| "logits/chosen": -1.853735327720642, | |
| "logits/rejected": -1.7524950504302979, | |
| "logps/chosen": -404.90545654296875, | |
| "logps/rejected": -425.74676513671875, | |
| "loss": 0.5607, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -1.266443133354187, | |
| "rewards/margins": 0.5788331031799316, | |
| "rewards/rejected": -1.8452762365341187, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 7.3125, | |
| "learning_rate": 4.980435359184203e-06, | |
| "logits/chosen": -1.9005975723266602, | |
| "logits/rejected": -1.8376613855361938, | |
| "logps/chosen": -341.048828125, | |
| "logps/rejected": -359.40496826171875, | |
| "loss": 0.6122, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.6798708438873291, | |
| "rewards/margins": 0.3456707298755646, | |
| "rewards/rejected": -1.0255415439605713, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 10.875, | |
| "learning_rate": 4.9774794202667236e-06, | |
| "logits/chosen": -1.8874883651733398, | |
| "logits/rejected": -1.8308721780776978, | |
| "logps/chosen": -315.84173583984375, | |
| "logps/rejected": -365.2502746582031, | |
| "loss": 0.5734, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.5474014282226562, | |
| "rewards/margins": 0.40957459807395935, | |
| "rewards/rejected": -0.9569761157035828, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 8.5, | |
| "learning_rate": 4.974316612530615e-06, | |
| "logits/chosen": -1.8144280910491943, | |
| "logits/rejected": -1.657810926437378, | |
| "logps/chosen": -369.9844665527344, | |
| "logps/rejected": -390.8047180175781, | |
| "loss": 0.5011, | |
| "rewards/accuracies": 0.8187500238418579, | |
| "rewards/chosen": -0.9150163531303406, | |
| "rewards/margins": 0.676045298576355, | |
| "rewards/rejected": -1.5910617113113403, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 12.625, | |
| "learning_rate": 4.970947200069416e-06, | |
| "logits/chosen": -1.7606821060180664, | |
| "logits/rejected": -1.7015259265899658, | |
| "logps/chosen": -427.96990966796875, | |
| "logps/rejected": -451.92205810546875, | |
| "loss": 0.6311, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -1.494568943977356, | |
| "rewards/margins": 0.4373590350151062, | |
| "rewards/rejected": -1.931928038597107, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 7.78125, | |
| "learning_rate": 4.967371464228096e-06, | |
| "logits/chosen": -1.9176502227783203, | |
| "logits/rejected": -1.832397699356079, | |
| "logps/chosen": -372.6578369140625, | |
| "logps/rejected": -429.7704162597656, | |
| "loss": 0.5482, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.1515864133834839, | |
| "rewards/margins": 0.567107081413269, | |
| "rewards/rejected": -1.718693494796753, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 5.53125, | |
| "learning_rate": 4.963589703579569e-06, | |
| "logits/chosen": -1.9988332986831665, | |
| "logits/rejected": -1.8672618865966797, | |
| "logps/chosen": -407.62664794921875, | |
| "logps/rejected": -419.98291015625, | |
| "loss": 0.5754, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -1.075867772102356, | |
| "rewards/margins": 0.47032594680786133, | |
| "rewards/rejected": -1.5461935997009277, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 8.5, | |
| "learning_rate": 4.9596022338997615e-06, | |
| "logits/chosen": -1.9790706634521484, | |
| "logits/rejected": -1.7595863342285156, | |
| "logps/chosen": -397.14752197265625, | |
| "logps/rejected": -413.5733337402344, | |
| "loss": 0.5495, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -0.9936078190803528, | |
| "rewards/margins": 0.5750035047531128, | |
| "rewards/rejected": -1.5686112642288208, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 7.9375, | |
| "learning_rate": 4.955409388141243e-06, | |
| "logits/chosen": -1.8258365392684937, | |
| "logits/rejected": -1.7129390239715576, | |
| "logps/chosen": -363.6575622558594, | |
| "logps/rejected": -387.19378662109375, | |
| "loss": 0.6003, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -1.0635744333267212, | |
| "rewards/margins": 0.4720209240913391, | |
| "rewards/rejected": -1.5355952978134155, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 4.84375, | |
| "learning_rate": 4.951011516405429e-06, | |
| "logits/chosen": -1.8798444271087646, | |
| "logits/rejected": -1.8100011348724365, | |
| "logps/chosen": -338.61151123046875, | |
| "logps/rejected": -374.54974365234375, | |
| "loss": 0.5632, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.8785476684570312, | |
| "rewards/margins": 0.5087668895721436, | |
| "rewards/rejected": -1.3873146772384644, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_logits/chosen": -1.778578281402588, | |
| "eval_logits/rejected": -1.6489102840423584, | |
| "eval_logps/chosen": -342.7493896484375, | |
| "eval_logps/rejected": -372.69134521484375, | |
| "eval_loss": 0.5649436712265015, | |
| "eval_rewards/accuracies": 0.7039999961853027, | |
| "eval_rewards/chosen": -0.7809735536575317, | |
| "eval_rewards/margins": 0.49985405802726746, | |
| "eval_rewards/rejected": -1.2808276414871216, | |
| "eval_runtime": 385.3125, | |
| "eval_samples_per_second": 5.191, | |
| "eval_steps_per_second": 0.649, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 6.71875, | |
| "learning_rate": 4.946408985913344e-06, | |
| "logits/chosen": -1.8086153268814087, | |
| "logits/rejected": -1.7312501668930054, | |
| "logps/chosen": -321.55279541015625, | |
| "logps/rejected": -367.79229736328125, | |
| "loss": 0.5218, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.7568905353546143, | |
| "rewards/margins": 0.6399748921394348, | |
| "rewards/rejected": -1.3968654870986938, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 16.25, | |
| "learning_rate": 4.941602180974958e-06, | |
| "logits/chosen": -1.833062767982483, | |
| "logits/rejected": -1.5977442264556885, | |
| "logps/chosen": -380.17169189453125, | |
| "logps/rejected": -390.75848388671875, | |
| "loss": 0.524, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -0.9234441518783569, | |
| "rewards/margins": 0.6925610303878784, | |
| "rewards/rejected": -1.616005301475525, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 6.71875, | |
| "learning_rate": 4.936591502957101e-06, | |
| "logits/chosen": -1.813197374343872, | |
| "logits/rejected": -1.6430933475494385, | |
| "logps/chosen": -355.9547424316406, | |
| "logps/rejected": -418.7765197753906, | |
| "loss": 0.5344, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -1.078249216079712, | |
| "rewards/margins": 0.7311606407165527, | |
| "rewards/rejected": -1.809409737586975, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 7.0, | |
| "learning_rate": 4.931377370249946e-06, | |
| "logits/chosen": -1.8197021484375, | |
| "logits/rejected": -1.5834531784057617, | |
| "logps/chosen": -435.12738037109375, | |
| "logps/rejected": -468.70501708984375, | |
| "loss": 0.5641, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -1.6980397701263428, | |
| "rewards/margins": 0.593255341053009, | |
| "rewards/rejected": -2.291295289993286, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 10.0, | |
| "learning_rate": 4.925960218232073e-06, | |
| "logits/chosen": -1.7958835363388062, | |
| "logits/rejected": -1.6748111248016357, | |
| "logps/chosen": -392.5576171875, | |
| "logps/rejected": -455.75811767578125, | |
| "loss": 0.5384, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -1.372554898262024, | |
| "rewards/margins": 0.7270306348800659, | |
| "rewards/rejected": -2.09958553314209, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 8.1875, | |
| "learning_rate": 4.920340499234116e-06, | |
| "logits/chosen": -1.7571017742156982, | |
| "logits/rejected": -1.5184545516967773, | |
| "logps/chosen": -403.0295715332031, | |
| "logps/rejected": -419.2205505371094, | |
| "loss": 0.5787, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -1.3702582120895386, | |
| "rewards/margins": 0.5140202641487122, | |
| "rewards/rejected": -1.884278655052185, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 6.53125, | |
| "learning_rate": 4.914518682500995e-06, | |
| "logits/chosen": -1.9124584197998047, | |
| "logits/rejected": -1.694361925125122, | |
| "logps/chosen": -436.59747314453125, | |
| "logps/rejected": -460.3738708496094, | |
| "loss": 0.5391, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -1.530548334121704, | |
| "rewards/margins": 0.6449794769287109, | |
| "rewards/rejected": -2.175528049468994, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 9.5625, | |
| "learning_rate": 4.9084952541527315e-06, | |
| "logits/chosen": -1.7815015316009521, | |
| "logits/rejected": -1.5756428241729736, | |
| "logps/chosen": -448.1412658691406, | |
| "logps/rejected": -469.43603515625, | |
| "loss": 0.5139, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -1.7480707168579102, | |
| "rewards/margins": 0.6839998364448547, | |
| "rewards/rejected": -2.432070255279541, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 9.3125, | |
| "learning_rate": 4.902270717143858e-06, | |
| "logits/chosen": -1.7120873928070068, | |
| "logits/rejected": -1.6082136631011963, | |
| "logps/chosen": -419.0126037597656, | |
| "logps/rejected": -534.6773681640625, | |
| "loss": 0.4522, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -1.827803611755371, | |
| "rewards/margins": 1.0398612022399902, | |
| "rewards/rejected": -2.8676648139953613, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 6.8125, | |
| "learning_rate": 4.895845591221427e-06, | |
| "logits/chosen": -1.676849365234375, | |
| "logits/rejected": -1.601438283920288, | |
| "logps/chosen": -455.9642639160156, | |
| "logps/rejected": -528.1475219726562, | |
| "loss": 0.5331, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -2.0265378952026367, | |
| "rewards/margins": 0.7665891647338867, | |
| "rewards/rejected": -2.7931270599365234, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_logits/chosen": -1.5919249057769775, | |
| "eval_logits/rejected": -1.469058632850647, | |
| "eval_logps/chosen": -455.5274963378906, | |
| "eval_logps/rejected": -512.6751098632812, | |
| "eval_loss": 0.560720682144165, | |
| "eval_rewards/accuracies": 0.7059999704360962, | |
| "eval_rewards/chosen": -1.9087554216384888, | |
| "eval_rewards/margins": 0.7719098925590515, | |
| "eval_rewards/rejected": -2.6806650161743164, | |
| "eval_runtime": 385.1228, | |
| "eval_samples_per_second": 5.193, | |
| "eval_steps_per_second": 0.649, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 9.5, | |
| "learning_rate": 4.8892204128816e-06, | |
| "logits/chosen": -1.7319362163543701, | |
| "logits/rejected": -1.619175672531128, | |
| "logps/chosen": -431.63232421875, | |
| "logps/rejected": -489.86297607421875, | |
| "loss": 0.5277, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.6406848430633545, | |
| "rewards/margins": 0.7181671857833862, | |
| "rewards/rejected": -2.358851909637451, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 7.28125, | |
| "learning_rate": 4.882395735324864e-06, | |
| "logits/chosen": -1.6986335515975952, | |
| "logits/rejected": -1.5594747066497803, | |
| "logps/chosen": -427.96978759765625, | |
| "logps/rejected": -497.16766357421875, | |
| "loss": 0.4996, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -1.6188774108886719, | |
| "rewards/margins": 0.8638092875480652, | |
| "rewards/rejected": -2.4826865196228027, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 7.25, | |
| "learning_rate": 4.87537212840983e-06, | |
| "logits/chosen": -1.6116526126861572, | |
| "logits/rejected": -1.474578619003296, | |
| "logps/chosen": -464.416259765625, | |
| "logps/rejected": -503.581787109375, | |
| "loss": 0.576, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -1.981610894203186, | |
| "rewards/margins": 0.6970613598823547, | |
| "rewards/rejected": -2.6786723136901855, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 9.1875, | |
| "learning_rate": 4.8681501786056545e-06, | |
| "logits/chosen": -1.5888502597808838, | |
| "logits/rejected": -1.4401233196258545, | |
| "logps/chosen": -373.1294860839844, | |
| "logps/rejected": -415.46240234375, | |
| "loss": 0.5066, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -1.449134111404419, | |
| "rewards/margins": 0.7457250356674194, | |
| "rewards/rejected": -2.194859027862549, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 14.75, | |
| "learning_rate": 4.860730488943068e-06, | |
| "logits/chosen": -1.6056511402130127, | |
| "logits/rejected": -1.5784225463867188, | |
| "logps/chosen": -356.6183166503906, | |
| "logps/rejected": -429.750732421875, | |
| "loss": 0.5024, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.2209298610687256, | |
| "rewards/margins": 0.7504220008850098, | |
| "rewards/rejected": -1.971351981163025, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 6.96875, | |
| "learning_rate": 4.853113678964022e-06, | |
| "logits/chosen": -1.6386051177978516, | |
| "logits/rejected": -1.5690464973449707, | |
| "logps/chosen": -394.1507568359375, | |
| "logps/rejected": -469.383056640625, | |
| "loss": 0.4908, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -1.2175710201263428, | |
| "rewards/margins": 0.8312736749649048, | |
| "rewards/rejected": -2.048844575881958, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 15.75, | |
| "learning_rate": 4.845300384669958e-06, | |
| "logits/chosen": -1.6991758346557617, | |
| "logits/rejected": -1.563987374305725, | |
| "logps/chosen": -405.8094482421875, | |
| "logps/rejected": -445.58209228515625, | |
| "loss": 0.5794, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -1.5260562896728516, | |
| "rewards/margins": 0.6259430050849915, | |
| "rewards/rejected": -2.1519992351531982, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 8.9375, | |
| "learning_rate": 4.837291258468701e-06, | |
| "logits/chosen": -1.7494251728057861, | |
| "logits/rejected": -1.6077022552490234, | |
| "logps/chosen": -431.01519775390625, | |
| "logps/rejected": -486.4640197753906, | |
| "loss": 0.5468, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -1.4566891193389893, | |
| "rewards/margins": 0.8033839464187622, | |
| "rewards/rejected": -2.260073184967041, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 6.78125, | |
| "learning_rate": 4.829086969119984e-06, | |
| "logits/chosen": -1.613250732421875, | |
| "logits/rejected": -1.5955699682235718, | |
| "logps/chosen": -397.90008544921875, | |
| "logps/rejected": -463.9117126464844, | |
| "loss": 0.6001, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -1.4717532396316528, | |
| "rewards/margins": 0.64255690574646, | |
| "rewards/rejected": -2.1143100261688232, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 7.5, | |
| "learning_rate": 4.820688201679605e-06, | |
| "logits/chosen": -1.8398478031158447, | |
| "logits/rejected": -1.5474001169204712, | |
| "logps/chosen": -399.21368408203125, | |
| "logps/rejected": -416.2703552246094, | |
| "loss": 0.4996, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.3657824993133545, | |
| "rewards/margins": 0.7942038774490356, | |
| "rewards/rejected": -2.1599864959716797, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_logits/chosen": -1.6709563732147217, | |
| "eval_logits/rejected": -1.546115756034851, | |
| "eval_logps/chosen": -409.65435791015625, | |
| "eval_logps/rejected": -460.5684814453125, | |
| "eval_loss": 0.543312132358551, | |
| "eval_rewards/accuracies": 0.7070000171661377, | |
| "eval_rewards/chosen": -1.4500234127044678, | |
| "eval_rewards/margins": 0.7095751166343689, | |
| "eval_rewards/rejected": -2.1595985889434814, | |
| "eval_runtime": 385.2124, | |
| "eval_samples_per_second": 5.192, | |
| "eval_steps_per_second": 0.649, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 7.34375, | |
| "learning_rate": 4.8120956574422315e-06, | |
| "logits/chosen": -1.7810264825820923, | |
| "logits/rejected": -1.7489475011825562, | |
| "logps/chosen": -431.69219970703125, | |
| "logps/rejected": -477.871337890625, | |
| "loss": 0.6275, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -1.5776736736297607, | |
| "rewards/margins": 0.5325725674629211, | |
| "rewards/rejected": -2.110246181488037, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 13.625, | |
| "learning_rate": 4.803310053882831e-06, | |
| "logits/chosen": -1.7703052759170532, | |
| "logits/rejected": -1.7803173065185547, | |
| "logps/chosen": -363.9437561035156, | |
| "logps/rejected": -435.0057678222656, | |
| "loss": 0.5542, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.3461793661117554, | |
| "rewards/margins": 0.5871396064758301, | |
| "rewards/rejected": -1.933318853378296, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 6.875, | |
| "learning_rate": 4.794332124596775e-06, | |
| "logits/chosen": -1.8022472858428955, | |
| "logits/rejected": -1.6746841669082642, | |
| "logps/chosen": -397.36090087890625, | |
| "logps/rejected": -445.603759765625, | |
| "loss": 0.5885, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -1.2891987562179565, | |
| "rewards/margins": 0.5191696882247925, | |
| "rewards/rejected": -1.808368444442749, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 9.375, | |
| "learning_rate": 4.785162619238575e-06, | |
| "logits/chosen": -1.7888991832733154, | |
| "logits/rejected": -1.6187770366668701, | |
| "logps/chosen": -355.0903015136719, | |
| "logps/rejected": -387.1643981933594, | |
| "loss": 0.5416, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -1.012415885925293, | |
| "rewards/margins": 0.5939286947250366, | |
| "rewards/rejected": -1.6063445806503296, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 6.78125, | |
| "learning_rate": 4.775802303459288e-06, | |
| "logits/chosen": -1.7059911489486694, | |
| "logits/rejected": -1.6270997524261475, | |
| "logps/chosen": -346.2181091308594, | |
| "logps/rejected": -401.40069580078125, | |
| "loss": 0.5465, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": -0.9845376014709473, | |
| "rewards/margins": 0.5990740656852722, | |
| "rewards/rejected": -1.5836117267608643, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 11.125, | |
| "learning_rate": 4.766251958842589e-06, | |
| "logits/chosen": -1.676922082901001, | |
| "logits/rejected": -1.5429388284683228, | |
| "logps/chosen": -394.45416259765625, | |
| "logps/rejected": -433.03369140625, | |
| "loss": 0.5815, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -1.219531774520874, | |
| "rewards/margins": 0.49049144983291626, | |
| "rewards/rejected": -1.710023283958435, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 6.78125, | |
| "learning_rate": 4.7565123828395066e-06, | |
| "logits/chosen": -1.5784261226654053, | |
| "logits/rejected": -1.5068719387054443, | |
| "logps/chosen": -391.16192626953125, | |
| "logps/rejected": -455.4800720214844, | |
| "loss": 0.531, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -1.3523309230804443, | |
| "rewards/margins": 0.6492956280708313, | |
| "rewards/rejected": -2.001626491546631, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 6.65625, | |
| "learning_rate": 4.746584388701831e-06, | |
| "logits/chosen": -1.6509666442871094, | |
| "logits/rejected": -1.5814907550811768, | |
| "logps/chosen": -408.57598876953125, | |
| "logps/rejected": -468.8497619628906, | |
| "loss": 0.5239, | |
| "rewards/accuracies": 0.793749988079071, | |
| "rewards/chosen": -1.4533666372299194, | |
| "rewards/margins": 0.7448235750198364, | |
| "rewards/rejected": -2.198190212249756, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 9.5625, | |
| "learning_rate": 4.736468805414218e-06, | |
| "logits/chosen": -1.6324241161346436, | |
| "logits/rejected": -1.6051101684570312, | |
| "logps/chosen": -362.0763244628906, | |
| "logps/rejected": -444.11077880859375, | |
| "loss": 0.5667, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -1.1464101076126099, | |
| "rewards/margins": 0.6818080544471741, | |
| "rewards/rejected": -1.8282181024551392, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 12.125, | |
| "learning_rate": 4.7261664776249595e-06, | |
| "logits/chosen": -1.5433322191238403, | |
| "logits/rejected": -1.4583094120025635, | |
| "logps/chosen": -336.41778564453125, | |
| "logps/rejected": -401.85772705078125, | |
| "loss": 0.514, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -1.0750483274459839, | |
| "rewards/margins": 0.7443105578422546, | |
| "rewards/rejected": -1.8193588256835938, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_logits/chosen": -1.621368169784546, | |
| "eval_logits/rejected": -1.5014086961746216, | |
| "eval_logps/chosen": -391.2229919433594, | |
| "eval_logps/rejected": -436.3040771484375, | |
| "eval_loss": 0.5440120697021484, | |
| "eval_rewards/accuracies": 0.718999981880188, | |
| "eval_rewards/chosen": -1.2657097578048706, | |
| "eval_rewards/margins": 0.6512450575828552, | |
| "eval_rewards/rejected": -1.916954755783081, | |
| "eval_runtime": 385.3527, | |
| "eval_samples_per_second": 5.19, | |
| "eval_steps_per_second": 0.649, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 7.8125, | |
| "learning_rate": 4.715678265575463e-06, | |
| "logits/chosen": -1.7400833368301392, | |
| "logits/rejected": -1.5401082038879395, | |
| "logps/chosen": -410.2032775878906, | |
| "logps/rejected": -411.843994140625, | |
| "loss": 0.556, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -1.2593110799789429, | |
| "rewards/margins": 0.5718873739242554, | |
| "rewards/rejected": -1.8311984539031982, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 9.3125, | |
| "learning_rate": 4.705005045028415e-06, | |
| "logits/chosen": -1.6306053400039673, | |
| "logits/rejected": -1.5210235118865967, | |
| "logps/chosen": -400.1542053222656, | |
| "logps/rejected": -448.408447265625, | |
| "loss": 0.5563, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -1.370181679725647, | |
| "rewards/margins": 0.6565110087394714, | |
| "rewards/rejected": -2.0266928672790527, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 10.4375, | |
| "learning_rate": 4.694147707194659e-06, | |
| "logits/chosen": -1.6995433568954468, | |
| "logits/rejected": -1.6389293670654297, | |
| "logps/chosen": -427.10137939453125, | |
| "logps/rejected": -471.07952880859375, | |
| "loss": 0.5469, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -1.5522325038909912, | |
| "rewards/margins": 0.6380002498626709, | |
| "rewards/rejected": -2.190232753753662, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 6.65625, | |
| "learning_rate": 4.683107158658782e-06, | |
| "logits/chosen": -1.6130173206329346, | |
| "logits/rejected": -1.5491468906402588, | |
| "logps/chosen": -439.54248046875, | |
| "logps/rejected": -480.834228515625, | |
| "loss": 0.512, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.4930182695388794, | |
| "rewards/margins": 0.7003245949745178, | |
| "rewards/rejected": -2.193342924118042, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 11.3125, | |
| "learning_rate": 4.671884321303407e-06, | |
| "logits/chosen": -1.6797221899032593, | |
| "logits/rejected": -1.5230547189712524, | |
| "logps/chosen": -394.5656433105469, | |
| "logps/rejected": -453.25946044921875, | |
| "loss": 0.5134, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.4823963642120361, | |
| "rewards/margins": 0.7305151224136353, | |
| "rewards/rejected": -2.212911605834961, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 7.9375, | |
| "learning_rate": 4.660480132232224e-06, | |
| "logits/chosen": -1.7173080444335938, | |
| "logits/rejected": -1.60665762424469, | |
| "logps/chosen": -406.39117431640625, | |
| "logps/rejected": -445.9922790527344, | |
| "loss": 0.5666, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -1.3457807302474976, | |
| "rewards/margins": 0.637101411819458, | |
| "rewards/rejected": -1.9828822612762451, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 8.5625, | |
| "learning_rate": 4.6488955436917414e-06, | |
| "logits/chosen": -1.7457382678985596, | |
| "logits/rejected": -1.5430558919906616, | |
| "logps/chosen": -429.39300537109375, | |
| "logps/rejected": -465.61224365234375, | |
| "loss": 0.5461, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -1.49411940574646, | |
| "rewards/margins": 0.8279851078987122, | |
| "rewards/rejected": -2.3221046924591064, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 7.03125, | |
| "learning_rate": 4.6371315229917644e-06, | |
| "logits/chosen": -1.7286710739135742, | |
| "logits/rejected": -1.5955041646957397, | |
| "logps/chosen": -443.83270263671875, | |
| "logps/rejected": -498.59967041015625, | |
| "loss": 0.5188, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.6041603088378906, | |
| "rewards/margins": 0.7516692876815796, | |
| "rewards/rejected": -2.3558297157287598, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 10.625, | |
| "learning_rate": 4.625189052424638e-06, | |
| "logits/chosen": -1.6606595516204834, | |
| "logits/rejected": -1.5426713228225708, | |
| "logps/chosen": -412.0262145996094, | |
| "logps/rejected": -478.1866149902344, | |
| "loss": 0.4696, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -1.7292293310165405, | |
| "rewards/margins": 0.888912558555603, | |
| "rewards/rejected": -2.6181421279907227, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 7.46875, | |
| "learning_rate": 4.613069129183218e-06, | |
| "logits/chosen": -1.7503217458724976, | |
| "logits/rejected": -1.6148483753204346, | |
| "logps/chosen": -452.8263244628906, | |
| "logps/rejected": -481.222900390625, | |
| "loss": 0.5468, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -1.4942306280136108, | |
| "rewards/margins": 0.6457923054695129, | |
| "rewards/rejected": -2.1400229930877686, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_logits/chosen": -1.5656111240386963, | |
| "eval_logits/rejected": -1.4448813199996948, | |
| "eval_logps/chosen": -401.67669677734375, | |
| "eval_logps/rejected": -451.64080810546875, | |
| "eval_loss": 0.5418093204498291, | |
| "eval_rewards/accuracies": 0.7174999713897705, | |
| "eval_rewards/chosen": -1.3702467679977417, | |
| "eval_rewards/margins": 0.7000752091407776, | |
| "eval_rewards/rejected": -2.070322036743164, | |
| "eval_runtime": 385.2164, | |
| "eval_samples_per_second": 5.192, | |
| "eval_steps_per_second": 0.649, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 7.8125, | |
| "learning_rate": 4.600772765277607e-06, | |
| "logits/chosen": -1.531764268875122, | |
| "logits/rejected": -1.4728986024856567, | |
| "logps/chosen": -375.1974792480469, | |
| "logps/rejected": -444.0108337402344, | |
| "loss": 0.5138, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -1.4164024591445923, | |
| "rewards/margins": 0.7396863698959351, | |
| "rewards/rejected": -2.1560888290405273, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 16.25, | |
| "learning_rate": 4.588300987450652e-06, | |
| "logits/chosen": -1.6515556573867798, | |
| "logits/rejected": -1.547123670578003, | |
| "logps/chosen": -394.8990173339844, | |
| "logps/rejected": -431.17645263671875, | |
| "loss": 0.5418, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.4115506410598755, | |
| "rewards/margins": 0.6983591318130493, | |
| "rewards/rejected": -2.1099095344543457, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 6.875, | |
| "learning_rate": 4.5756548370922136e-06, | |
| "logits/chosen": -1.6495920419692993, | |
| "logits/rejected": -1.5659213066101074, | |
| "logps/chosen": -351.00146484375, | |
| "logps/rejected": -412.519287109375, | |
| "loss": 0.5127, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.1029760837554932, | |
| "rewards/margins": 0.7049504518508911, | |
| "rewards/rejected": -1.8079265356063843, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 9.0625, | |
| "learning_rate": 4.562835370152206e-06, | |
| "logits/chosen": -1.7497320175170898, | |
| "logits/rejected": -1.5089380741119385, | |
| "logps/chosen": -426.8157653808594, | |
| "logps/rejected": -491.87860107421875, | |
| "loss": 0.473, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -1.2558623552322388, | |
| "rewards/margins": 0.9615765810012817, | |
| "rewards/rejected": -2.2174386978149414, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 7.0625, | |
| "learning_rate": 4.54984365705243e-06, | |
| "logits/chosen": -1.6929643154144287, | |
| "logits/rejected": -1.5880625247955322, | |
| "logps/chosen": -421.701416015625, | |
| "logps/rejected": -518.8970947265625, | |
| "loss": 0.4784, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -1.5323131084442139, | |
| "rewards/margins": 1.0377166271209717, | |
| "rewards/rejected": -2.5700297355651855, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 12.25, | |
| "learning_rate": 4.536680782597191e-06, | |
| "logits/chosen": -1.5793530941009521, | |
| "logits/rejected": -1.503025770187378, | |
| "logps/chosen": -413.30792236328125, | |
| "logps/rejected": -483.18048095703125, | |
| "loss": 0.5921, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -1.7728277444839478, | |
| "rewards/margins": 0.7549096345901489, | |
| "rewards/rejected": -2.5277373790740967, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 15.3125, | |
| "learning_rate": 4.523347845882718e-06, | |
| "logits/chosen": -1.6937191486358643, | |
| "logits/rejected": -1.5083749294281006, | |
| "logps/chosen": -422.14447021484375, | |
| "logps/rejected": -479.6094665527344, | |
| "loss": 0.4495, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -1.3397438526153564, | |
| "rewards/margins": 1.0674594640731812, | |
| "rewards/rejected": -2.407203197479248, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 5.375, | |
| "learning_rate": 4.50984596020539e-06, | |
| "logits/chosen": -1.544276475906372, | |
| "logits/rejected": -1.4562034606933594, | |
| "logps/chosen": -403.8301696777344, | |
| "logps/rejected": -444.5962829589844, | |
| "loss": 0.573, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -1.236783504486084, | |
| "rewards/margins": 0.7301002740859985, | |
| "rewards/rejected": -1.9668840169906616, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 7.40625, | |
| "learning_rate": 4.4961762529687745e-06, | |
| "logits/chosen": -1.6948843002319336, | |
| "logits/rejected": -1.5669870376586914, | |
| "logps/chosen": -365.44342041015625, | |
| "logps/rejected": -436.5625915527344, | |
| "loss": 0.5044, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -1.0504177808761597, | |
| "rewards/margins": 0.8762027621269226, | |
| "rewards/rejected": -1.9266207218170166, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 9.6875, | |
| "learning_rate": 4.482339865589492e-06, | |
| "logits/chosen": -1.6588748693466187, | |
| "logits/rejected": -1.5048010349273682, | |
| "logps/chosen": -401.0564270019531, | |
| "logps/rejected": -414.84466552734375, | |
| "loss": 0.569, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.3476970195770264, | |
| "rewards/margins": 0.5775381922721863, | |
| "rewards/rejected": -1.925235390663147, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_logits/chosen": -1.5524324178695679, | |
| "eval_logits/rejected": -1.427809476852417, | |
| "eval_logps/chosen": -378.61767578125, | |
| "eval_logps/rejected": -430.84136962890625, | |
| "eval_loss": 0.5299040675163269, | |
| "eval_rewards/accuracies": 0.7210000157356262, | |
| "eval_rewards/chosen": -1.1396570205688477, | |
| "eval_rewards/margins": 0.7226706147193909, | |
| "eval_rewards/rejected": -1.8623274564743042, | |
| "eval_runtime": 385.4496, | |
| "eval_samples_per_second": 5.189, | |
| "eval_steps_per_second": 0.649, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 5.6875, | |
| "learning_rate": 4.468337953401909e-06, | |
| "logits/chosen": -1.661257028579712, | |
| "logits/rejected": -1.5975781679153442, | |
| "logps/chosen": -380.5933837890625, | |
| "logps/rejected": -433.12139892578125, | |
| "loss": 0.5657, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.1024227142333984, | |
| "rewards/margins": 0.5738898515701294, | |
| "rewards/rejected": -1.6763126850128174, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 7.34375, | |
| "learning_rate": 4.45417168556166e-06, | |
| "logits/chosen": -1.5824635028839111, | |
| "logits/rejected": -1.4781149625778198, | |
| "logps/chosen": -340.6497497558594, | |
| "logps/rejected": -407.69293212890625, | |
| "loss": 0.5255, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.9949854016304016, | |
| "rewards/margins": 0.6768967509269714, | |
| "rewards/rejected": -1.6718822717666626, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 10.5625, | |
| "learning_rate": 4.439842244948036e-06, | |
| "logits/chosen": -1.5540910959243774, | |
| "logits/rejected": -1.4291226863861084, | |
| "logps/chosen": -390.7538757324219, | |
| "logps/rejected": -446.49310302734375, | |
| "loss": 0.5752, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -1.296918511390686, | |
| "rewards/margins": 0.6129493117332458, | |
| "rewards/rejected": -1.9098678827285767, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 14.125, | |
| "learning_rate": 4.425350828065204e-06, | |
| "logits/chosen": -1.6088273525238037, | |
| "logits/rejected": -1.3946729898452759, | |
| "logps/chosen": -412.3367614746094, | |
| "logps/rejected": -442.0401916503906, | |
| "loss": 0.5089, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -1.2825069427490234, | |
| "rewards/margins": 0.7757080793380737, | |
| "rewards/rejected": -2.0582151412963867, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 7.875, | |
| "learning_rate": 4.410698644942303e-06, | |
| "logits/chosen": -1.6174886226654053, | |
| "logits/rejected": -1.4844688177108765, | |
| "logps/chosen": -402.29486083984375, | |
| "logps/rejected": -463.25689697265625, | |
| "loss": 0.4913, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -1.299822211265564, | |
| "rewards/margins": 0.8488262891769409, | |
| "rewards/rejected": -2.148648738861084, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 11.125, | |
| "learning_rate": 4.395886919032406e-06, | |
| "logits/chosen": -1.4636362791061401, | |
| "logits/rejected": -1.3575894832611084, | |
| "logps/chosen": -405.80010986328125, | |
| "logps/rejected": -456.88641357421875, | |
| "loss": 0.5316, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -1.4196369647979736, | |
| "rewards/margins": 0.7757617235183716, | |
| "rewards/rejected": -2.1953988075256348, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 6.6875, | |
| "learning_rate": 4.380916887110366e-06, | |
| "logits/chosen": -1.6339868307113647, | |
| "logits/rejected": -1.4374290704727173, | |
| "logps/chosen": -406.9070739746094, | |
| "logps/rejected": -451.3981018066406, | |
| "loss": 0.5169, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -1.477386474609375, | |
| "rewards/margins": 0.8484745025634766, | |
| "rewards/rejected": -2.3258609771728516, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 6.84375, | |
| "learning_rate": 4.365789799169539e-06, | |
| "logits/chosen": -1.4347012042999268, | |
| "logits/rejected": -1.4834723472595215, | |
| "logps/chosen": -395.71014404296875, | |
| "logps/rejected": -475.1640625, | |
| "loss": 0.5232, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -1.4909913539886475, | |
| "rewards/margins": 0.7410578727722168, | |
| "rewards/rejected": -2.2320492267608643, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 6.78125, | |
| "learning_rate": 4.350506918317416e-06, | |
| "logits/chosen": -1.6247329711914062, | |
| "logits/rejected": -1.4631903171539307, | |
| "logps/chosen": -389.4300842285156, | |
| "logps/rejected": -455.31573486328125, | |
| "loss": 0.5133, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -1.4183425903320312, | |
| "rewards/margins": 0.7307096719741821, | |
| "rewards/rejected": -2.149052143096924, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 6.15625, | |
| "learning_rate": 4.335069520670149e-06, | |
| "logits/chosen": -1.4696300029754639, | |
| "logits/rejected": -1.3941162824630737, | |
| "logps/chosen": -352.44671630859375, | |
| "logps/rejected": -424.1249084472656, | |
| "loss": 0.5732, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -1.2273385524749756, | |
| "rewards/margins": 0.6349586248397827, | |
| "rewards/rejected": -1.8622970581054688, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_logits/chosen": -1.4804484844207764, | |
| "eval_logits/rejected": -1.3595802783966064, | |
| "eval_logps/chosen": -375.21826171875, | |
| "eval_logps/rejected": -427.4810485839844, | |
| "eval_loss": 0.5184832811355591, | |
| "eval_rewards/accuracies": 0.7250000238418579, | |
| "eval_rewards/chosen": -1.1056623458862305, | |
| "eval_rewards/margins": 0.7230623364448547, | |
| "eval_rewards/rejected": -1.82872474193573, | |
| "eval_runtime": 385.0476, | |
| "eval_samples_per_second": 5.194, | |
| "eval_steps_per_second": 0.649, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 9.8125, | |
| "learning_rate": 4.319478895246e-06, | |
| "logits/chosen": -1.5287452936172485, | |
| "logits/rejected": -1.3607852458953857, | |
| "logps/chosen": -350.2371520996094, | |
| "logps/rejected": -398.1286315917969, | |
| "loss": 0.5104, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -1.0312172174453735, | |
| "rewards/margins": 0.7258288264274597, | |
| "rewards/rejected": -1.757046103477478, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 13.0, | |
| "learning_rate": 4.303736343857704e-06, | |
| "logits/chosen": -1.5342817306518555, | |
| "logits/rejected": -1.4489666223526, | |
| "logps/chosen": -372.7054138183594, | |
| "logps/rejected": -475.97601318359375, | |
| "loss": 0.5008, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -1.1559852361679077, | |
| "rewards/margins": 0.9135689735412598, | |
| "rewards/rejected": -2.069554328918457, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 8.125, | |
| "learning_rate": 4.287843181003772e-06, | |
| "logits/chosen": -1.5427916049957275, | |
| "logits/rejected": -1.3855717182159424, | |
| "logps/chosen": -458.01641845703125, | |
| "logps/rejected": -475.8519592285156, | |
| "loss": 0.5884, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -1.6416466236114502, | |
| "rewards/margins": 0.6417607665061951, | |
| "rewards/rejected": -2.283407211303711, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 6.46875, | |
| "learning_rate": 4.27180073375873e-06, | |
| "logits/chosen": -1.5489182472229004, | |
| "logits/rejected": -1.402178168296814, | |
| "logps/chosen": -442.7936096191406, | |
| "logps/rejected": -477.34515380859375, | |
| "loss": 0.5287, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -1.5542631149291992, | |
| "rewards/margins": 0.7610459327697754, | |
| "rewards/rejected": -2.3153088092803955, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 4.4375, | |
| "learning_rate": 4.255610341662304e-06, | |
| "logits/chosen": -1.6110093593597412, | |
| "logits/rejected": -1.398992896080017, | |
| "logps/chosen": -380.208740234375, | |
| "logps/rejected": -425.40838623046875, | |
| "loss": 0.5553, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -1.2492074966430664, | |
| "rewards/margins": 0.6511562466621399, | |
| "rewards/rejected": -1.900363564491272, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 6.84375, | |
| "learning_rate": 4.2392733566075764e-06, | |
| "logits/chosen": -1.59576416015625, | |
| "logits/rejected": -1.4599517583847046, | |
| "logps/chosen": -401.14984130859375, | |
| "logps/rejected": -438.921630859375, | |
| "loss": 0.591, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -1.4514347314834595, | |
| "rewards/margins": 0.5331937670707703, | |
| "rewards/rejected": -1.984628438949585, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 9.875, | |
| "learning_rate": 4.2227911427280975e-06, | |
| "logits/chosen": -1.5509364604949951, | |
| "logits/rejected": -1.3630738258361816, | |
| "logps/chosen": -384.2834777832031, | |
| "logps/rejected": -420.7542419433594, | |
| "loss": 0.5353, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -1.339779257774353, | |
| "rewards/margins": 0.6931589841842651, | |
| "rewards/rejected": -2.0329384803771973, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 9.1875, | |
| "learning_rate": 4.206165076283983e-06, | |
| "logits/chosen": -1.5844643115997314, | |
| "logits/rejected": -1.4324209690093994, | |
| "logps/chosen": -375.78973388671875, | |
| "logps/rejected": -440.9784240722656, | |
| "loss": 0.4792, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -1.29856538772583, | |
| "rewards/margins": 0.8626803159713745, | |
| "rewards/rejected": -2.161245584487915, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 10.5625, | |
| "learning_rate": 4.189396545546995e-06, | |
| "logits/chosen": -1.5281752347946167, | |
| "logits/rejected": -1.4283504486083984, | |
| "logps/chosen": -397.5606384277344, | |
| "logps/rejected": -468.21002197265625, | |
| "loss": 0.5202, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -1.4838616847991943, | |
| "rewards/margins": 0.9059172868728638, | |
| "rewards/rejected": -2.3897788524627686, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 10.9375, | |
| "learning_rate": 4.172486950684627e-06, | |
| "logits/chosen": -1.480257511138916, | |
| "logits/rejected": -1.4012019634246826, | |
| "logps/chosen": -429.61181640625, | |
| "logps/rejected": -510.66522216796875, | |
| "loss": 0.5332, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.7596956491470337, | |
| "rewards/margins": 0.8419567942619324, | |
| "rewards/rejected": -2.6016526222229004, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_logits/chosen": -1.30724036693573, | |
| "eval_logits/rejected": -1.1976608037948608, | |
| "eval_logps/chosen": -478.32550048828125, | |
| "eval_logps/rejected": -549.7024536132812, | |
| "eval_loss": 0.5315085053443909, | |
| "eval_rewards/accuracies": 0.7239999771118164, | |
| "eval_rewards/chosen": -2.1367344856262207, | |
| "eval_rewards/margins": 0.9142037630081177, | |
| "eval_rewards/rejected": -3.050938367843628, | |
| "eval_runtime": 385.0593, | |
| "eval_samples_per_second": 5.194, | |
| "eval_steps_per_second": 0.649, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 12.6875, | |
| "learning_rate": 4.155437703643182e-06, | |
| "logits/chosen": -1.4552199840545654, | |
| "logits/rejected": -1.306873083114624, | |
| "logps/chosen": -439.85382080078125, | |
| "logps/rejected": -500.3904724121094, | |
| "loss": 0.5037, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -1.9311021566390991, | |
| "rewards/margins": 0.8962618708610535, | |
| "rewards/rejected": -2.827363967895508, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 12.8125, | |
| "learning_rate": 4.138250228029882e-06, | |
| "logits/chosen": -1.482912302017212, | |
| "logits/rejected": -1.403141736984253, | |
| "logps/chosen": -424.140380859375, | |
| "logps/rejected": -514.3765869140625, | |
| "loss": 0.5066, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -1.6720972061157227, | |
| "rewards/margins": 0.8676088452339172, | |
| "rewards/rejected": -2.539705753326416, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 6.9375, | |
| "learning_rate": 4.120925958993994e-06, | |
| "logits/chosen": -1.4682929515838623, | |
| "logits/rejected": -1.3645504713058472, | |
| "logps/chosen": -376.16033935546875, | |
| "logps/rejected": -447.6339416503906, | |
| "loss": 0.5583, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -1.4225904941558838, | |
| "rewards/margins": 0.7580591440200806, | |
| "rewards/rejected": -2.180649757385254, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 10.8125, | |
| "learning_rate": 4.103466343106999e-06, | |
| "logits/chosen": -1.5599358081817627, | |
| "logits/rejected": -1.4370046854019165, | |
| "logps/chosen": -424.14849853515625, | |
| "logps/rejected": -472.6615295410156, | |
| "loss": 0.5315, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -1.5476281642913818, | |
| "rewards/margins": 0.743899941444397, | |
| "rewards/rejected": -2.2915279865264893, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 8.625, | |
| "learning_rate": 4.085872838241797e-06, | |
| "logits/chosen": -1.464450716972351, | |
| "logits/rejected": -1.3373545408248901, | |
| "logps/chosen": -405.13262939453125, | |
| "logps/rejected": -447.93994140625, | |
| "loss": 0.5899, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -1.427685022354126, | |
| "rewards/margins": 0.6289039850234985, | |
| "rewards/rejected": -2.056588649749756, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 9.75, | |
| "learning_rate": 4.06814691345098e-06, | |
| "logits/chosen": -1.452643871307373, | |
| "logits/rejected": -1.2927871942520142, | |
| "logps/chosen": -378.2747497558594, | |
| "logps/rejected": -437.4178161621094, | |
| "loss": 0.4989, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -1.2272692918777466, | |
| "rewards/margins": 0.8121210336685181, | |
| "rewards/rejected": -2.0393900871276855, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 10.4375, | |
| "learning_rate": 4.050290048844171e-06, | |
| "logits/chosen": -1.572665810585022, | |
| "logits/rejected": -1.4710958003997803, | |
| "logps/chosen": -398.8462829589844, | |
| "logps/rejected": -468.70794677734375, | |
| "loss": 0.5368, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -1.30232834815979, | |
| "rewards/margins": 0.750015139579773, | |
| "rewards/rejected": -2.0523436069488525, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 7.46875, | |
| "learning_rate": 4.032303735464422e-06, | |
| "logits/chosen": -1.6318330764770508, | |
| "logits/rejected": -1.4836442470550537, | |
| "logps/chosen": -405.6830749511719, | |
| "logps/rejected": -475.7168884277344, | |
| "loss": 0.4568, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -1.337021827697754, | |
| "rewards/margins": 0.9311949610710144, | |
| "rewards/rejected": -2.268216609954834, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 9.6875, | |
| "learning_rate": 4.014189475163727e-06, | |
| "logits/chosen": -1.4534022808074951, | |
| "logits/rejected": -1.3461982011795044, | |
| "logps/chosen": -380.7342224121094, | |
| "logps/rejected": -464.63916015625, | |
| "loss": 0.4968, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -1.2607730627059937, | |
| "rewards/margins": 0.9202286005020142, | |
| "rewards/rejected": -2.181001663208008, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 15.0625, | |
| "learning_rate": 3.995948780477605e-06, | |
| "logits/chosen": -1.5742177963256836, | |
| "logits/rejected": -1.410463809967041, | |
| "logps/chosen": -382.19830322265625, | |
| "logps/rejected": -427.6187438964844, | |
| "loss": 0.5431, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -1.1344490051269531, | |
| "rewards/margins": 0.7007244825363159, | |
| "rewards/rejected": -1.8351733684539795, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_logits/chosen": -1.431371808052063, | |
| "eval_logits/rejected": -1.3129903078079224, | |
| "eval_logps/chosen": -390.28460693359375, | |
| "eval_logps/rejected": -454.35223388671875, | |
| "eval_loss": 0.521051287651062, | |
| "eval_rewards/accuracies": 0.7260000109672546, | |
| "eval_rewards/chosen": -1.2563258409500122, | |
| "eval_rewards/margins": 0.841110348701477, | |
| "eval_rewards/rejected": -2.09743595123291, | |
| "eval_runtime": 385.3298, | |
| "eval_samples_per_second": 5.19, | |
| "eval_steps_per_second": 0.649, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 10.875, | |
| "learning_rate": 3.977583174498816e-06, | |
| "logits/chosen": -1.4515248537063599, | |
| "logits/rejected": -1.3351339101791382, | |
| "logps/chosen": -412.0836486816406, | |
| "logps/rejected": -511.7002868652344, | |
| "loss": 0.3984, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -1.4840004444122314, | |
| "rewards/margins": 1.215951681137085, | |
| "rewards/rejected": -2.6999518871307373, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 10.375, | |
| "learning_rate": 3.959094190750172e-06, | |
| "logits/chosen": -1.4154666662216187, | |
| "logits/rejected": -1.2808506488800049, | |
| "logps/chosen": -463.95367431640625, | |
| "logps/rejected": -530.5446166992188, | |
| "loss": 0.5238, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.7879329919815063, | |
| "rewards/margins": 0.9349812269210815, | |
| "rewards/rejected": -2.722913980484009, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 14.625, | |
| "learning_rate": 3.9404833730564975e-06, | |
| "logits/chosen": -1.3400425910949707, | |
| "logits/rejected": -1.2239243984222412, | |
| "logps/chosen": -414.04168701171875, | |
| "logps/rejected": -493.5077209472656, | |
| "loss": 0.5162, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -1.6094110012054443, | |
| "rewards/margins": 0.910406768321991, | |
| "rewards/rejected": -2.51981782913208, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 13.0, | |
| "learning_rate": 3.921752275415712e-06, | |
| "logits/chosen": -1.4123733043670654, | |
| "logits/rejected": -1.379097580909729, | |
| "logps/chosen": -400.0645751953125, | |
| "logps/rejected": -482.3004455566406, | |
| "loss": 0.455, | |
| "rewards/accuracies": 0.793749988079071, | |
| "rewards/chosen": -1.495286464691162, | |
| "rewards/margins": 1.028262734413147, | |
| "rewards/rejected": -2.5235490798950195, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 6.40625, | |
| "learning_rate": 3.902902461869079e-06, | |
| "logits/chosen": -1.3998125791549683, | |
| "logits/rejected": -1.2797114849090576, | |
| "logps/chosen": -421.95794677734375, | |
| "logps/rejected": -507.14361572265625, | |
| "loss": 0.5415, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -1.8244426250457764, | |
| "rewards/margins": 1.0165250301361084, | |
| "rewards/rejected": -2.840967893600464, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 15.8125, | |
| "learning_rate": 3.883935506370605e-06, | |
| "logits/chosen": -1.4051461219787598, | |
| "logits/rejected": -1.2663236856460571, | |
| "logps/chosen": -432.9169921875, | |
| "logps/rejected": -484.6170349121094, | |
| "loss": 0.5752, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -1.7967593669891357, | |
| "rewards/margins": 0.7953070402145386, | |
| "rewards/rejected": -2.592066526412964, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 5.71875, | |
| "learning_rate": 3.864852992655617e-06, | |
| "logits/chosen": -1.5188504457473755, | |
| "logits/rejected": -1.4224086999893188, | |
| "logps/chosen": -385.0553283691406, | |
| "logps/rejected": -460.64166259765625, | |
| "loss": 0.4617, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -1.364206075668335, | |
| "rewards/margins": 0.8787292242050171, | |
| "rewards/rejected": -2.2429351806640625, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 6.0, | |
| "learning_rate": 3.845656514108516e-06, | |
| "logits/chosen": -1.4730474948883057, | |
| "logits/rejected": -1.3063628673553467, | |
| "logps/chosen": -420.05364990234375, | |
| "logps/rejected": -448.61663818359375, | |
| "loss": 0.4919, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.609230637550354, | |
| "rewards/margins": 0.8354890942573547, | |
| "rewards/rejected": -2.4447197914123535, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 15.6875, | |
| "learning_rate": 3.826347673629738e-06, | |
| "logits/chosen": -1.447205901145935, | |
| "logits/rejected": -1.2630943059921265, | |
| "logps/chosen": -382.7901916503906, | |
| "logps/rejected": -455.2850036621094, | |
| "loss": 0.4846, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -1.2948672771453857, | |
| "rewards/margins": 0.9875162243843079, | |
| "rewards/rejected": -2.282383441925049, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 13.4375, | |
| "learning_rate": 3.8069280835019062e-06, | |
| "logits/chosen": -1.4306355714797974, | |
| "logits/rejected": -1.2892208099365234, | |
| "logps/chosen": -402.4939880371094, | |
| "logps/rejected": -487.1109313964844, | |
| "loss": 0.4862, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.38181734085083, | |
| "rewards/margins": 1.0380725860595703, | |
| "rewards/rejected": -2.4198899269104004, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_logits/chosen": -1.4015111923217773, | |
| "eval_logits/rejected": -1.2794849872589111, | |
| "eval_logps/chosen": -401.4261779785156, | |
| "eval_logps/rejected": -472.0146179199219, | |
| "eval_loss": 0.5161935091018677, | |
| "eval_rewards/accuracies": 0.7354999780654907, | |
| "eval_rewards/chosen": -1.3677420616149902, | |
| "eval_rewards/margins": 0.9063177704811096, | |
| "eval_rewards/rejected": -2.274059534072876, | |
| "eval_runtime": 384.9141, | |
| "eval_samples_per_second": 5.196, | |
| "eval_steps_per_second": 0.649, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 10.0625, | |
| "learning_rate": 3.7873993652552077e-06, | |
| "logits/chosen": -1.4077152013778687, | |
| "logits/rejected": -1.3199503421783447, | |
| "logps/chosen": -359.19488525390625, | |
| "logps/rejected": -424.65576171875, | |
| "loss": 0.6047, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -1.2548080682754517, | |
| "rewards/margins": 0.7129807472229004, | |
| "rewards/rejected": -1.9677889347076416, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 8.1875, | |
| "learning_rate": 3.7677631495319953e-06, | |
| "logits/chosen": -1.5366017818450928, | |
| "logits/rejected": -1.420841932296753, | |
| "logps/chosen": -355.3591613769531, | |
| "logps/rejected": -406.9905700683594, | |
| "loss": 0.5263, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.9167426228523254, | |
| "rewards/margins": 0.6963993310928345, | |
| "rewards/rejected": -1.6131420135498047, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 6.15625, | |
| "learning_rate": 3.748021075950633e-06, | |
| "logits/chosen": -1.5663089752197266, | |
| "logits/rejected": -1.4497790336608887, | |
| "logps/chosen": -371.51312255859375, | |
| "logps/rejected": -410.3604431152344, | |
| "loss": 0.5946, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.9686979055404663, | |
| "rewards/margins": 0.49481868743896484, | |
| "rewards/rejected": -1.4635167121887207, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 9.9375, | |
| "learning_rate": 3.7281747929685824e-06, | |
| "logits/chosen": -1.4247326850891113, | |
| "logits/rejected": -1.265855073928833, | |
| "logps/chosen": -353.05194091796875, | |
| "logps/rejected": -399.3148498535156, | |
| "loss": 0.548, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -1.1539690494537354, | |
| "rewards/margins": 0.6195784211158752, | |
| "rewards/rejected": -1.7735474109649658, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 7.59375, | |
| "learning_rate": 3.7082259577447604e-06, | |
| "logits/chosen": -1.5184131860733032, | |
| "logits/rejected": -1.4079376459121704, | |
| "logps/chosen": -389.82550048828125, | |
| "logps/rejected": -445.1400451660156, | |
| "loss": 0.4885, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -1.1863467693328857, | |
| "rewards/margins": 0.7531275749206543, | |
| "rewards/rejected": -1.9394744634628296, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 8.5625, | |
| "learning_rate": 3.6881762360011688e-06, | |
| "logits/chosen": -1.5098861455917358, | |
| "logits/rejected": -1.317479133605957, | |
| "logps/chosen": -411.7156677246094, | |
| "logps/rejected": -458.67218017578125, | |
| "loss": 0.5111, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -1.3021931648254395, | |
| "rewards/margins": 0.8395140767097473, | |
| "rewards/rejected": -2.141706943511963, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 10.8125, | |
| "learning_rate": 3.668027301883802e-06, | |
| "logits/chosen": -1.4269211292266846, | |
| "logits/rejected": -1.2615479230880737, | |
| "logps/chosen": -402.62603759765625, | |
| "logps/rejected": -482.184326171875, | |
| "loss": 0.511, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -1.5126349925994873, | |
| "rewards/margins": 0.9446732401847839, | |
| "rewards/rejected": -2.457308053970337, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 6.09375, | |
| "learning_rate": 3.64778083782286e-06, | |
| "logits/chosen": -1.2994117736816406, | |
| "logits/rejected": -1.2819687128067017, | |
| "logps/chosen": -454.22711181640625, | |
| "logps/rejected": -568.9495239257812, | |
| "loss": 0.5489, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -2.0162034034729004, | |
| "rewards/margins": 0.8572282791137695, | |
| "rewards/rejected": -2.87343168258667, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 8.125, | |
| "learning_rate": 3.627438534392268e-06, | |
| "logits/chosen": -1.4073131084442139, | |
| "logits/rejected": -1.3753129243850708, | |
| "logps/chosen": -438.55255126953125, | |
| "logps/rejected": -532.357421875, | |
| "loss": 0.4994, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -2.0050759315490723, | |
| "rewards/margins": 0.8708721399307251, | |
| "rewards/rejected": -2.875947952270508, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 10.5625, | |
| "learning_rate": 3.607002090168506e-06, | |
| "logits/chosen": -1.2787964344024658, | |
| "logits/rejected": -1.2062056064605713, | |
| "logps/chosen": -478.2181091308594, | |
| "logps/rejected": -532.1177978515625, | |
| "loss": 0.5858, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -2.139094829559326, | |
| "rewards/margins": 0.7530891299247742, | |
| "rewards/rejected": -2.892183780670166, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "eval_logits/chosen": -1.2717995643615723, | |
| "eval_logits/rejected": -1.1533604860305786, | |
| "eval_logps/chosen": -445.6515197753906, | |
| "eval_logps/rejected": -514.567138671875, | |
| "eval_loss": 0.5072752833366394, | |
| "eval_rewards/accuracies": 0.7365000247955322, | |
| "eval_rewards/chosen": -1.809995174407959, | |
| "eval_rewards/margins": 0.889590322971344, | |
| "eval_rewards/rejected": -2.6995856761932373, | |
| "eval_runtime": 385.2379, | |
| "eval_samples_per_second": 5.192, | |
| "eval_steps_per_second": 0.649, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 4.8125, | |
| "learning_rate": 3.586473211588787e-06, | |
| "logits/chosen": -1.3733545541763306, | |
| "logits/rejected": -1.2681185007095337, | |
| "logps/chosen": -407.07623291015625, | |
| "logps/rejected": -509.69683837890625, | |
| "loss": 0.4615, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -1.6238105297088623, | |
| "rewards/margins": 0.9594193696975708, | |
| "rewards/rejected": -2.5832300186157227, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 11.125, | |
| "learning_rate": 3.5658536128085623e-06, | |
| "logits/chosen": -1.3982038497924805, | |
| "logits/rejected": -1.2271344661712646, | |
| "logps/chosen": -460.24951171875, | |
| "logps/rejected": -503.8080139160156, | |
| "loss": 0.595, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -1.9733803272247314, | |
| "rewards/margins": 0.7311316728591919, | |
| "rewards/rejected": -2.704512119293213, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 8.625, | |
| "learning_rate": 3.545145015558399e-06, | |
| "logits/chosen": -1.1945741176605225, | |
| "logits/rejected": -1.1713488101959229, | |
| "logps/chosen": -412.6747131347656, | |
| "logps/rejected": -492.372802734375, | |
| "loss": 0.5028, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.8103736639022827, | |
| "rewards/margins": 0.9391372799873352, | |
| "rewards/rejected": -2.7495107650756836, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 8.9375, | |
| "learning_rate": 3.5243491490002056e-06, | |
| "logits/chosen": -1.3308615684509277, | |
| "logits/rejected": -1.2446686029434204, | |
| "logps/chosen": -433.484375, | |
| "logps/rejected": -507.3377990722656, | |
| "loss": 0.5688, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -1.7979129552841187, | |
| "rewards/margins": 0.7903792262077332, | |
| "rewards/rejected": -2.588292360305786, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 7.8125, | |
| "learning_rate": 3.503467749582857e-06, | |
| "logits/chosen": -1.378259301185608, | |
| "logits/rejected": -1.1882727146148682, | |
| "logps/chosen": -412.93560791015625, | |
| "logps/rejected": -446.2088317871094, | |
| "loss": 0.5722, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -1.6171245574951172, | |
| "rewards/margins": 0.6728037595748901, | |
| "rewards/rejected": -2.2899281978607178, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 14.125, | |
| "learning_rate": 3.4825025608971947e-06, | |
| "logits/chosen": -1.2760392427444458, | |
| "logits/rejected": -1.2017720937728882, | |
| "logps/chosen": -379.2555847167969, | |
| "logps/rejected": -455.60791015625, | |
| "loss": 0.5323, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.573118805885315, | |
| "rewards/margins": 0.7255537509918213, | |
| "rewards/rejected": -2.2986724376678467, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 7.34375, | |
| "learning_rate": 3.4614553335304407e-06, | |
| "logits/chosen": -1.3151836395263672, | |
| "logits/rejected": -1.113488793373108, | |
| "logps/chosen": -440.9947814941406, | |
| "logps/rejected": -505.5106506347656, | |
| "loss": 0.4714, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -1.7208404541015625, | |
| "rewards/margins": 0.9629707336425781, | |
| "rewards/rejected": -2.6838109493255615, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 7.28125, | |
| "learning_rate": 3.4403278249200222e-06, | |
| "logits/chosen": -1.289880633354187, | |
| "logits/rejected": -1.0922951698303223, | |
| "logps/chosen": -470.2997131347656, | |
| "logps/rejected": -540.0161743164062, | |
| "loss": 0.4409, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.8769254684448242, | |
| "rewards/margins": 1.1163800954818726, | |
| "rewards/rejected": -2.9933059215545654, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 15.75, | |
| "learning_rate": 3.4191217992068293e-06, | |
| "logits/chosen": -1.3650540113449097, | |
| "logits/rejected": -1.1904373168945312, | |
| "logps/chosen": -491.87060546875, | |
| "logps/rejected": -539.9581298828125, | |
| "loss": 0.5323, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -2.137281656265259, | |
| "rewards/margins": 0.9366267323493958, | |
| "rewards/rejected": -3.0739083290100098, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 11.8125, | |
| "learning_rate": 3.3978390270879056e-06, | |
| "logits/chosen": -1.273272156715393, | |
| "logits/rejected": -1.1826374530792236, | |
| "logps/chosen": -441.7779235839844, | |
| "logps/rejected": -540.5211791992188, | |
| "loss": 0.5147, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -2.3176229000091553, | |
| "rewards/margins": 0.9195470809936523, | |
| "rewards/rejected": -3.2371699810028076, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "eval_logits/chosen": -1.269109845161438, | |
| "eval_logits/rejected": -1.146828293800354, | |
| "eval_logps/chosen": -491.4620666503906, | |
| "eval_logps/rejected": -566.2828979492188, | |
| "eval_loss": 0.5000255107879639, | |
| "eval_rewards/accuracies": 0.734000027179718, | |
| "eval_rewards/chosen": -2.2681005001068115, | |
| "eval_rewards/margins": 0.9486428499221802, | |
| "eval_rewards/rejected": -3.2167434692382812, | |
| "eval_runtime": 385.0866, | |
| "eval_samples_per_second": 5.194, | |
| "eval_steps_per_second": 0.649, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 11.6875, | |
| "learning_rate": 3.3764812856685995e-06, | |
| "logits/chosen": -1.3418161869049072, | |
| "logits/rejected": -1.3261712789535522, | |
| "logps/chosen": -440.2900390625, | |
| "logps/rejected": -541.0260009765625, | |
| "loss": 0.5252, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -2.1573386192321777, | |
| "rewards/margins": 0.8722022175788879, | |
| "rewards/rejected": -3.029540777206421, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 10.875, | |
| "learning_rate": 3.3550503583141726e-06, | |
| "logits/chosen": -1.4454267024993896, | |
| "logits/rejected": -1.311650276184082, | |
| "logps/chosen": -485.03411865234375, | |
| "logps/rejected": -569.6810913085938, | |
| "loss": 0.4849, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -2.1618409156799316, | |
| "rewards/margins": 0.9978164434432983, | |
| "rewards/rejected": -3.1596572399139404, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 8.375, | |
| "learning_rate": 3.3335480345008907e-06, | |
| "logits/chosen": -1.2839902639389038, | |
| "logits/rejected": -1.1861859560012817, | |
| "logps/chosen": -466.77850341796875, | |
| "logps/rejected": -553.5386352539062, | |
| "loss": 0.4622, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -2.0662953853607178, | |
| "rewards/margins": 1.1144917011260986, | |
| "rewards/rejected": -3.1807870864868164, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 8.0, | |
| "learning_rate": 3.3119761096666055e-06, | |
| "logits/chosen": -1.3106259107589722, | |
| "logits/rejected": -1.1651959419250488, | |
| "logps/chosen": -498.0738830566406, | |
| "logps/rejected": -552.7239379882812, | |
| "loss": 0.5547, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -2.230546236038208, | |
| "rewards/margins": 0.8497620820999146, | |
| "rewards/rejected": -3.080308437347412, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 7.6875, | |
| "learning_rate": 3.290336385060832e-06, | |
| "logits/chosen": -1.493554949760437, | |
| "logits/rejected": -1.2929532527923584, | |
| "logps/chosen": -479.22259521484375, | |
| "logps/rejected": -548.3055419921875, | |
| "loss": 0.55, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -2.3036773204803467, | |
| "rewards/margins": 0.8925528526306152, | |
| "rewards/rejected": -3.196229934692383, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 9.3125, | |
| "learning_rate": 3.268630667594348e-06, | |
| "logits/chosen": -1.355196237564087, | |
| "logits/rejected": -1.3183298110961914, | |
| "logps/chosen": -460.26336669921875, | |
| "logps/rejected": -523.685546875, | |
| "loss": 0.5176, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -2.046140670776367, | |
| "rewards/margins": 0.8765950202941895, | |
| "rewards/rejected": -2.9227356910705566, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 11.125, | |
| "learning_rate": 3.2468607696883147e-06, | |
| "logits/chosen": -1.3625749349594116, | |
| "logits/rejected": -1.311535358428955, | |
| "logps/chosen": -489.18017578125, | |
| "logps/rejected": -587.8863525390625, | |
| "loss": 0.4934, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -2.360048294067383, | |
| "rewards/margins": 0.9435898065567017, | |
| "rewards/rejected": -3.303637742996216, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 7.78125, | |
| "learning_rate": 3.225028509122944e-06, | |
| "logits/chosen": -1.397005319595337, | |
| "logits/rejected": -1.2728253602981567, | |
| "logps/chosen": -486.8643493652344, | |
| "logps/rejected": -561.2532958984375, | |
| "loss": 0.5211, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -2.503471851348877, | |
| "rewards/margins": 0.8570526838302612, | |
| "rewards/rejected": -3.3605244159698486, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 11.6875, | |
| "learning_rate": 3.2031357088857083e-06, | |
| "logits/chosen": -1.3312914371490479, | |
| "logits/rejected": -1.2595702409744263, | |
| "logps/chosen": -561.8858032226562, | |
| "logps/rejected": -660.8182373046875, | |
| "loss": 0.5043, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -2.8668177127838135, | |
| "rewards/margins": 1.0241832733154297, | |
| "rewards/rejected": -3.8910012245178223, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 12.8125, | |
| "learning_rate": 3.181184197019127e-06, | |
| "logits/chosen": -1.1215088367462158, | |
| "logits/rejected": -1.0118662118911743, | |
| "logps/chosen": -525.9521484375, | |
| "logps/rejected": -697.3963623046875, | |
| "loss": 0.4809, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -2.957373857498169, | |
| "rewards/margins": 1.4523636102676392, | |
| "rewards/rejected": -4.409738063812256, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "eval_logits/chosen": -1.1786177158355713, | |
| "eval_logits/rejected": -1.0616753101348877, | |
| "eval_logps/chosen": -557.43115234375, | |
| "eval_logps/rejected": -643.640869140625, | |
| "eval_loss": 0.5022104382514954, | |
| "eval_rewards/accuracies": 0.7404999732971191, | |
| "eval_rewards/chosen": -2.9277913570404053, | |
| "eval_rewards/margins": 1.062530517578125, | |
| "eval_rewards/rejected": -3.9903218746185303, | |
| "eval_runtime": 384.8251, | |
| "eval_samples_per_second": 5.197, | |
| "eval_steps_per_second": 0.65, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 18.125, | |
| "learning_rate": 3.159175806468126e-06, | |
| "logits/chosen": -1.1367595195770264, | |
| "logits/rejected": -0.9490365982055664, | |
| "logps/chosen": -545.4899291992188, | |
| "logps/rejected": -620.2122192382812, | |
| "loss": 0.5001, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -2.9681437015533447, | |
| "rewards/margins": 1.0490918159484863, | |
| "rewards/rejected": -4.01723575592041, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 13.375, | |
| "learning_rate": 3.1371123749269804e-06, | |
| "logits/chosen": -1.2076561450958252, | |
| "logits/rejected": -1.135667085647583, | |
| "logps/chosen": -596.1036376953125, | |
| "logps/rejected": -664.8118896484375, | |
| "loss": 0.5596, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -3.1270740032196045, | |
| "rewards/margins": 0.847625732421875, | |
| "rewards/rejected": -3.9746997356414795, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 11.6875, | |
| "learning_rate": 3.114995744685877e-06, | |
| "logits/chosen": -1.1738382577896118, | |
| "logits/rejected": -1.146437644958496, | |
| "logps/chosen": -529.6216430664062, | |
| "logps/rejected": -603.9373168945312, | |
| "loss": 0.5267, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -2.8229541778564453, | |
| "rewards/margins": 0.8735902905464172, | |
| "rewards/rejected": -3.696544647216797, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 6.34375, | |
| "learning_rate": 3.0928277624770743e-06, | |
| "logits/chosen": -1.3653886318206787, | |
| "logits/rejected": -1.2098249197006226, | |
| "logps/chosen": -532.6870727539062, | |
| "logps/rejected": -613.7505493164062, | |
| "loss": 0.5049, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -2.4794299602508545, | |
| "rewards/margins": 1.0948512554168701, | |
| "rewards/rejected": -3.5742812156677246, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 6.625, | |
| "learning_rate": 3.070610279320708e-06, | |
| "logits/chosen": -1.3816752433776855, | |
| "logits/rejected": -1.2150719165802002, | |
| "logps/chosen": -521.9651489257812, | |
| "logps/rejected": -601.0781860351562, | |
| "loss": 0.4669, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -2.3488364219665527, | |
| "rewards/margins": 1.0314748287200928, | |
| "rewards/rejected": -3.3803107738494873, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 5.71875, | |
| "learning_rate": 3.0483451503702264e-06, | |
| "logits/chosen": -1.3038969039916992, | |
| "logits/rejected": -1.2319445610046387, | |
| "logps/chosen": -547.4259033203125, | |
| "logps/rejected": -617.3253784179688, | |
| "loss": 0.5618, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -2.6133077144622803, | |
| "rewards/margins": 0.8994197845458984, | |
| "rewards/rejected": -3.5127272605895996, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 8.1875, | |
| "learning_rate": 3.0260342347574916e-06, | |
| "logits/chosen": -1.2965396642684937, | |
| "logits/rejected": -1.1523797512054443, | |
| "logps/chosen": -519.9957275390625, | |
| "logps/rejected": -625.9295654296875, | |
| "loss": 0.4402, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -2.4765942096710205, | |
| "rewards/margins": 1.2496535778045654, | |
| "rewards/rejected": -3.726247787475586, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 9.8125, | |
| "learning_rate": 3.0036793954375358e-06, | |
| "logits/chosen": -1.2782443761825562, | |
| "logits/rejected": -1.1259523630142212, | |
| "logps/chosen": -547.7828979492188, | |
| "logps/rejected": -630.2535400390625, | |
| "loss": 0.4395, | |
| "rewards/accuracies": 0.8062499761581421, | |
| "rewards/chosen": -2.7768635749816895, | |
| "rewards/margins": 1.2470468282699585, | |
| "rewards/rejected": -4.0239105224609375, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 11.5, | |
| "learning_rate": 2.981282499033009e-06, | |
| "logits/chosen": -1.278181791305542, | |
| "logits/rejected": -1.1554654836654663, | |
| "logps/chosen": -553.5909423828125, | |
| "logps/rejected": -634.082275390625, | |
| "loss": 0.5183, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -2.772510051727295, | |
| "rewards/margins": 1.059287667274475, | |
| "rewards/rejected": -3.8317978382110596, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 8.0625, | |
| "learning_rate": 2.9588454156783163e-06, | |
| "logits/chosen": -1.327986717224121, | |
| "logits/rejected": -1.165433645248413, | |
| "logps/chosen": -511.99090576171875, | |
| "logps/rejected": -616.3585815429688, | |
| "loss": 0.46, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -2.3063502311706543, | |
| "rewards/margins": 1.2612559795379639, | |
| "rewards/rejected": -3.5676064491271973, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_logits/chosen": -1.2252681255340576, | |
| "eval_logits/rejected": -1.1040537357330322, | |
| "eval_logps/chosen": -507.9823303222656, | |
| "eval_logps/rejected": -594.7523193359375, | |
| "eval_loss": 0.5002806782722473, | |
| "eval_rewards/accuracies": 0.7354999780654907, | |
| "eval_rewards/chosen": -2.433302879333496, | |
| "eval_rewards/margins": 1.0681343078613281, | |
| "eval_rewards/rejected": -3.501437187194824, | |
| "eval_runtime": 384.8766, | |
| "eval_samples_per_second": 5.196, | |
| "eval_steps_per_second": 0.65, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 10.0625, | |
| "learning_rate": 2.9363700188634597e-06, | |
| "logits/chosen": -1.2988775968551636, | |
| "logits/rejected": -1.167811632156372, | |
| "logps/chosen": -534.3869018554688, | |
| "logps/rejected": -595.1586303710938, | |
| "loss": 0.518, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -2.6931681632995605, | |
| "rewards/margins": 0.9779523611068726, | |
| "rewards/rejected": -3.6711204051971436, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 11.3125, | |
| "learning_rate": 2.9138581852776053e-06, | |
| "logits/chosen": -1.2570379972457886, | |
| "logits/rejected": -1.1531012058258057, | |
| "logps/chosen": -555.2855834960938, | |
| "logps/rejected": -654.2891845703125, | |
| "loss": 0.508, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -2.955744981765747, | |
| "rewards/margins": 1.1195967197418213, | |
| "rewards/rejected": -4.075342178344727, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 7.8125, | |
| "learning_rate": 2.8913117946523805e-06, | |
| "logits/chosen": -1.280539631843567, | |
| "logits/rejected": -1.100694179534912, | |
| "logps/chosen": -573.8317260742188, | |
| "logps/rejected": -636.535400390625, | |
| "loss": 0.4979, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -3.0741798877716064, | |
| "rewards/margins": 1.0079572200775146, | |
| "rewards/rejected": -4.082137107849121, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 9.9375, | |
| "learning_rate": 2.8687327296049126e-06, | |
| "logits/chosen": -1.2726280689239502, | |
| "logits/rejected": -1.171382188796997, | |
| "logps/chosen": -556.9118041992188, | |
| "logps/rejected": -646.0772705078125, | |
| "loss": 0.5218, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -2.977890968322754, | |
| "rewards/margins": 0.9889172315597534, | |
| "rewards/rejected": -3.966808319091797, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 13.25, | |
| "learning_rate": 2.8461228754806376e-06, | |
| "logits/chosen": -1.3368163108825684, | |
| "logits/rejected": -1.172978401184082, | |
| "logps/chosen": -542.0377807617188, | |
| "logps/rejected": -597.8560180664062, | |
| "loss": 0.5274, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -2.6624741554260254, | |
| "rewards/margins": 0.8161913752555847, | |
| "rewards/rejected": -3.478665590286255, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 8.5625, | |
| "learning_rate": 2.823484120195865e-06, | |
| "logits/chosen": -1.4352657794952393, | |
| "logits/rejected": -1.227199912071228, | |
| "logps/chosen": -520.835693359375, | |
| "logps/rejected": -587.2822265625, | |
| "loss": 0.4585, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -2.4302444458007812, | |
| "rewards/margins": 1.0082801580429077, | |
| "rewards/rejected": -3.4385247230529785, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 10.3125, | |
| "learning_rate": 2.8008183540801486e-06, | |
| "logits/chosen": -1.293084979057312, | |
| "logits/rejected": -1.148153305053711, | |
| "logps/chosen": -520.2894897460938, | |
| "logps/rejected": -565.23681640625, | |
| "loss": 0.4997, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -2.4665493965148926, | |
| "rewards/margins": 0.9034391641616821, | |
| "rewards/rejected": -3.369988203048706, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 13.1875, | |
| "learning_rate": 2.7781274697184353e-06, | |
| "logits/chosen": -1.1424802541732788, | |
| "logits/rejected": -1.187720775604248, | |
| "logps/chosen": -492.9554138183594, | |
| "logps/rejected": -617.7970581054688, | |
| "loss": 0.5349, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -2.6359786987304688, | |
| "rewards/margins": 1.0452814102172852, | |
| "rewards/rejected": -3.681259870529175, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 9.375, | |
| "learning_rate": 2.7554133617930397e-06, | |
| "logits/chosen": -1.2500625848770142, | |
| "logits/rejected": -1.1256784200668335, | |
| "logps/chosen": -501.5577087402344, | |
| "logps/rejected": -588.8922119140625, | |
| "loss": 0.5168, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -2.537215232849121, | |
| "rewards/margins": 1.0240715742111206, | |
| "rewards/rejected": -3.5612869262695312, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 11.625, | |
| "learning_rate": 2.7326779269254363e-06, | |
| "logits/chosen": -1.436962366104126, | |
| "logits/rejected": -1.266498327255249, | |
| "logps/chosen": -528.1736450195312, | |
| "logps/rejected": -578.77734375, | |
| "loss": 0.477, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -2.388388156890869, | |
| "rewards/margins": 1.0654609203338623, | |
| "rewards/rejected": -3.4538490772247314, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "eval_logits/chosen": -1.2391676902770996, | |
| "eval_logits/rejected": -1.1185089349746704, | |
| "eval_logps/chosen": -503.76922607421875, | |
| "eval_logps/rejected": -583.5771484375, | |
| "eval_loss": 0.4988709092140198, | |
| "eval_rewards/accuracies": 0.734499990940094, | |
| "eval_rewards/chosen": -2.39117169380188, | |
| "eval_rewards/margins": 0.9985132813453674, | |
| "eval_rewards/rejected": -3.3896851539611816, | |
| "eval_runtime": 385.1549, | |
| "eval_samples_per_second": 5.193, | |
| "eval_steps_per_second": 0.649, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 10.375, | |
| "learning_rate": 2.7099230635178954e-06, | |
| "logits/chosen": -1.280256748199463, | |
| "logits/rejected": -1.239262342453003, | |
| "logps/chosen": -499.21240234375, | |
| "logps/rejected": -584.2531127929688, | |
| "loss": 0.5227, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -2.3436267375946045, | |
| "rewards/margins": 0.9151015281677246, | |
| "rewards/rejected": -3.25872802734375, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 10.625, | |
| "learning_rate": 2.6871506715949608e-06, | |
| "logits/chosen": -1.4013721942901611, | |
| "logits/rejected": -1.2793995141983032, | |
| "logps/chosen": -463.5269470214844, | |
| "logps/rejected": -541.9952392578125, | |
| "loss": 0.4813, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -2.1055784225463867, | |
| "rewards/margins": 0.965211033821106, | |
| "rewards/rejected": -3.070789337158203, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 10.25, | |
| "learning_rate": 2.6643626526448063e-06, | |
| "logits/chosen": -1.4540785551071167, | |
| "logits/rejected": -1.2950793504714966, | |
| "logps/chosen": -521.9766235351562, | |
| "logps/rejected": -591.3455810546875, | |
| "loss": 0.4591, | |
| "rewards/accuracies": 0.793749988079071, | |
| "rewards/chosen": -2.2707479000091553, | |
| "rewards/margins": 1.1174595355987549, | |
| "rewards/rejected": -3.388207197189331, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 12.375, | |
| "learning_rate": 2.6415609094604562e-06, | |
| "logits/chosen": -1.2611262798309326, | |
| "logits/rejected": -1.2067164182662964, | |
| "logps/chosen": -539.65869140625, | |
| "logps/rejected": -629.5203857421875, | |
| "loss": 0.4464, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -2.6909213066101074, | |
| "rewards/margins": 1.0888901948928833, | |
| "rewards/rejected": -3.7798118591308594, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 14.375, | |
| "learning_rate": 2.618747345980904e-06, | |
| "logits/chosen": -1.2389599084854126, | |
| "logits/rejected": -1.0517133474349976, | |
| "logps/chosen": -593.0328369140625, | |
| "logps/rejected": -635.1866455078125, | |
| "loss": 0.5624, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": -3.446354627609253, | |
| "rewards/margins": 0.9264065027236938, | |
| "rewards/rejected": -4.3727617263793945, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 6.5, | |
| "learning_rate": 2.595923867132136e-06, | |
| "logits/chosen": -1.2825162410736084, | |
| "logits/rejected": -1.1602712869644165, | |
| "logps/chosen": -608.6810302734375, | |
| "logps/rejected": -699.3939819335938, | |
| "loss": 0.5003, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -3.277606964111328, | |
| "rewards/margins": 1.153955101966858, | |
| "rewards/rejected": -4.4315619468688965, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 8.75, | |
| "learning_rate": 2.5730923786680672e-06, | |
| "logits/chosen": -1.2274243831634521, | |
| "logits/rejected": -1.191007375717163, | |
| "logps/chosen": -544.41259765625, | |
| "logps/rejected": -638.9494018554688, | |
| "loss": 0.5467, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -2.92881441116333, | |
| "rewards/margins": 0.869024932384491, | |
| "rewards/rejected": -3.7978389263153076, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 7.875, | |
| "learning_rate": 2.5502547870114137e-06, | |
| "logits/chosen": -1.3184901475906372, | |
| "logits/rejected": -1.196045994758606, | |
| "logps/chosen": -512.1152954101562, | |
| "logps/rejected": -571.6860961914062, | |
| "loss": 0.5238, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -2.5673513412475586, | |
| "rewards/margins": 0.8982425928115845, | |
| "rewards/rejected": -3.4655938148498535, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 10.5625, | |
| "learning_rate": 2.527412999094507e-06, | |
| "logits/chosen": -1.3197405338287354, | |
| "logits/rejected": -1.1518932580947876, | |
| "logps/chosen": -544.2307739257812, | |
| "logps/rejected": -638.2955932617188, | |
| "loss": 0.4778, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -2.50474214553833, | |
| "rewards/margins": 1.0661590099334717, | |
| "rewards/rejected": -3.5709011554718018, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 10.0625, | |
| "learning_rate": 2.504568922200064e-06, | |
| "logits/chosen": -1.283879041671753, | |
| "logits/rejected": -1.1339181661605835, | |
| "logps/chosen": -479.8946838378906, | |
| "logps/rejected": -564.1932373046875, | |
| "loss": 0.5068, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -2.385560989379883, | |
| "rewards/margins": 1.0045907497406006, | |
| "rewards/rejected": -3.3901519775390625, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "eval_logits/chosen": -1.2462238073349, | |
| "eval_logits/rejected": -1.125494360923767, | |
| "eval_logps/chosen": -512.4297485351562, | |
| "eval_logps/rejected": -591.323974609375, | |
| "eval_loss": 0.4939311146736145, | |
| "eval_rewards/accuracies": 0.7429999709129333, | |
| "eval_rewards/chosen": -2.4777767658233643, | |
| "eval_rewards/margins": 0.9893770217895508, | |
| "eval_rewards/rejected": -3.467153787612915, | |
| "eval_runtime": 385.17, | |
| "eval_samples_per_second": 5.193, | |
| "eval_steps_per_second": 0.649, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 9.6875, | |
| "learning_rate": 2.4817244638019333e-06, | |
| "logits/chosen": -1.3495204448699951, | |
| "logits/rejected": -1.1980758905410767, | |
| "logps/chosen": -514.2600708007812, | |
| "logps/rejected": -565.2801513671875, | |
| "loss": 0.5135, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -2.384481906890869, | |
| "rewards/margins": 0.922328770160675, | |
| "rewards/rejected": -3.3068108558654785, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 14.3125, | |
| "learning_rate": 2.4588815314058155e-06, | |
| "logits/chosen": -1.3099550008773804, | |
| "logits/rejected": -1.2511496543884277, | |
| "logps/chosen": -468.06011962890625, | |
| "logps/rejected": -523.5824584960938, | |
| "loss": 0.4817, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -2.224961757659912, | |
| "rewards/margins": 0.8980560302734375, | |
| "rewards/rejected": -3.1230177879333496, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 9.75, | |
| "learning_rate": 2.4360420323899922e-06, | |
| "logits/chosen": -1.353991985321045, | |
| "logits/rejected": -1.2306454181671143, | |
| "logps/chosen": -505.89434814453125, | |
| "logps/rejected": -550.9930419921875, | |
| "loss": 0.5674, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -2.3008170127868652, | |
| "rewards/margins": 0.7767833471298218, | |
| "rewards/rejected": -3.0776004791259766, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 8.0625, | |
| "learning_rate": 2.4132078738460585e-06, | |
| "logits/chosen": -1.3921695947647095, | |
| "logits/rejected": -1.2415539026260376, | |
| "logps/chosen": -491.42401123046875, | |
| "logps/rejected": -556.8810424804688, | |
| "loss": 0.4726, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -2.2695276737213135, | |
| "rewards/margins": 1.0401204824447632, | |
| "rewards/rejected": -3.309648036956787, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 13.4375, | |
| "learning_rate": 2.3903809624196826e-06, | |
| "logits/chosen": -1.3411505222320557, | |
| "logits/rejected": -1.2057361602783203, | |
| "logps/chosen": -456.32452392578125, | |
| "logps/rejected": -508.445068359375, | |
| "loss": 0.5549, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -2.2262067794799805, | |
| "rewards/margins": 0.8460358381271362, | |
| "rewards/rejected": -3.072242498397827, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 12.8125, | |
| "learning_rate": 2.3675632041513978e-06, | |
| "logits/chosen": -1.4614931344985962, | |
| "logits/rejected": -1.2260310649871826, | |
| "logps/chosen": -524.8610229492188, | |
| "logps/rejected": -565.4326171875, | |
| "loss": 0.4894, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -2.378574848175049, | |
| "rewards/margins": 1.0525071620941162, | |
| "rewards/rejected": -3.431082248687744, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 12.1875, | |
| "learning_rate": 2.3447565043174533e-06, | |
| "logits/chosen": -1.3028042316436768, | |
| "logits/rejected": -1.1499183177947998, | |
| "logps/chosen": -515.6001586914062, | |
| "logps/rejected": -565.5277099609375, | |
| "loss": 0.5241, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -2.5728909969329834, | |
| "rewards/margins": 0.8863977193832397, | |
| "rewards/rejected": -3.4592888355255127, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 10.3125, | |
| "learning_rate": 2.321962767270724e-06, | |
| "logits/chosen": -1.3512235879898071, | |
| "logits/rejected": -1.2086089849472046, | |
| "logps/chosen": -495.2906188964844, | |
| "logps/rejected": -538.8243408203125, | |
| "loss": 0.5573, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -2.4533779621124268, | |
| "rewards/margins": 0.8070129156112671, | |
| "rewards/rejected": -3.2603907585144043, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 10.0, | |
| "learning_rate": 2.299183896281692e-06, | |
| "logits/chosen": -1.301710844039917, | |
| "logits/rejected": -1.1697108745574951, | |
| "logps/chosen": -466.3893127441406, | |
| "logps/rejected": -546.2555541992188, | |
| "loss": 0.524, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -2.1145732402801514, | |
| "rewards/margins": 0.8218411207199097, | |
| "rewards/rejected": -2.9364142417907715, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 7.34375, | |
| "learning_rate": 2.2764217933795297e-06, | |
| "logits/chosen": -1.4019851684570312, | |
| "logits/rejected": -1.2783384323120117, | |
| "logps/chosen": -460.39227294921875, | |
| "logps/rejected": -538.6397705078125, | |
| "loss": 0.4832, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -1.9896026849746704, | |
| "rewards/margins": 0.9877565503120422, | |
| "rewards/rejected": -2.9773590564727783, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "eval_logits/chosen": -1.289854884147644, | |
| "eval_logits/rejected": -1.1670362949371338, | |
| "eval_logps/chosen": -477.1521911621094, | |
| "eval_logps/rejected": -549.7868041992188, | |
| "eval_loss": 0.49245789647102356, | |
| "eval_rewards/accuracies": 0.7425000071525574, | |
| "eval_rewards/chosen": -2.125001907348633, | |
| "eval_rewards/margins": 0.9267801642417908, | |
| "eval_rewards/rejected": -3.05178165435791, | |
| "eval_runtime": 385.1303, | |
| "eval_samples_per_second": 5.193, | |
| "eval_steps_per_second": 0.649, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 5.1875, | |
| "learning_rate": 2.2536783591932786e-06, | |
| "logits/chosen": -1.4467527866363525, | |
| "logits/rejected": -1.2898051738739014, | |
| "logps/chosen": -501.9493103027344, | |
| "logps/rejected": -568.07080078125, | |
| "loss": 0.5262, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -2.26446270942688, | |
| "rewards/margins": 0.842617392539978, | |
| "rewards/rejected": -3.1070799827575684, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 7.84375, | |
| "learning_rate": 2.230955492793149e-06, | |
| "logits/chosen": -1.2303822040557861, | |
| "logits/rejected": -1.1834524869918823, | |
| "logps/chosen": -536.91796875, | |
| "logps/rejected": -603.58203125, | |
| "loss": 0.5935, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -2.6312527656555176, | |
| "rewards/margins": 0.7955335378646851, | |
| "rewards/rejected": -3.4267868995666504, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 5.6875, | |
| "learning_rate": 2.208255091531947e-06, | |
| "logits/chosen": -1.2445331811904907, | |
| "logits/rejected": -1.1615046262741089, | |
| "logps/chosen": -523.9738159179688, | |
| "logps/rejected": -601.7839965820312, | |
| "loss": 0.4818, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -2.469764232635498, | |
| "rewards/margins": 1.127774953842163, | |
| "rewards/rejected": -3.597539186477661, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 11.75, | |
| "learning_rate": 2.1855790508866435e-06, | |
| "logits/chosen": -1.3009926080703735, | |
| "logits/rejected": -1.1936320066452026, | |
| "logps/chosen": -551.2839965820312, | |
| "logps/rejected": -635.8419799804688, | |
| "loss": 0.5122, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -2.5426268577575684, | |
| "rewards/margins": 1.0221750736236572, | |
| "rewards/rejected": -3.5648021697998047, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 7.0, | |
| "learning_rate": 2.162929264300107e-06, | |
| "logits/chosen": -1.313072919845581, | |
| "logits/rejected": -1.2196762561798096, | |
| "logps/chosen": -495.29840087890625, | |
| "logps/rejected": -598.8929443359375, | |
| "loss": 0.4195, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -2.2633450031280518, | |
| "rewards/margins": 1.2595245838165283, | |
| "rewards/rejected": -3.52286958694458, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 12.0, | |
| "learning_rate": 2.1403076230230006e-06, | |
| "logits/chosen": -1.2646925449371338, | |
| "logits/rejected": -1.1446959972381592, | |
| "logps/chosen": -531.4093017578125, | |
| "logps/rejected": -583.4620971679688, | |
| "loss": 0.587, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -2.6022684574127197, | |
| "rewards/margins": 0.7986178994178772, | |
| "rewards/rejected": -3.4008865356445312, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 11.625, | |
| "learning_rate": 2.11771601595586e-06, | |
| "logits/chosen": -1.3460241556167603, | |
| "logits/rejected": -1.232742428779602, | |
| "logps/chosen": -530.1009521484375, | |
| "logps/rejected": -569.1173095703125, | |
| "loss": 0.5295, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -2.4805283546447754, | |
| "rewards/margins": 0.9129024744033813, | |
| "rewards/rejected": -3.3934311866760254, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 16.625, | |
| "learning_rate": 2.0951563294913737e-06, | |
| "logits/chosen": -1.344582438468933, | |
| "logits/rejected": -1.1410505771636963, | |
| "logps/chosen": -493.6297912597656, | |
| "logps/rejected": -556.1669921875, | |
| "loss": 0.4651, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -2.3209660053253174, | |
| "rewards/margins": 0.930306613445282, | |
| "rewards/rejected": -3.251272678375244, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 7.59375, | |
| "learning_rate": 2.0726304473568693e-06, | |
| "logits/chosen": -1.3250610828399658, | |
| "logits/rejected": -1.207024097442627, | |
| "logps/chosen": -501.9657287597656, | |
| "logps/rejected": -565.3271484375, | |
| "loss": 0.4841, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -2.4302685260772705, | |
| "rewards/margins": 0.9568912386894226, | |
| "rewards/rejected": -3.387159824371338, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 10.25, | |
| "learning_rate": 2.050140250457023e-06, | |
| "logits/chosen": -1.4138681888580322, | |
| "logits/rejected": -1.1992824077606201, | |
| "logps/chosen": -557.7728881835938, | |
| "logps/rejected": -629.088623046875, | |
| "loss": 0.4731, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -2.802109479904175, | |
| "rewards/margins": 1.0537548065185547, | |
| "rewards/rejected": -3.8558642864227295, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "eval_logits/chosen": -1.2155396938323975, | |
| "eval_logits/rejected": -1.095304250717163, | |
| "eval_logps/chosen": -552.5741577148438, | |
| "eval_logps/rejected": -645.44482421875, | |
| "eval_loss": 0.49232217669487, | |
| "eval_rewards/accuracies": 0.7434999942779541, | |
| "eval_rewards/chosen": -2.8792214393615723, | |
| "eval_rewards/margins": 1.129140853881836, | |
| "eval_rewards/rejected": -4.008362770080566, | |
| "eval_runtime": 385.2143, | |
| "eval_samples_per_second": 5.192, | |
| "eval_steps_per_second": 0.649, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 14.0625, | |
| "learning_rate": 2.0276876167168042e-06, | |
| "logits/chosen": -1.1646645069122314, | |
| "logits/rejected": -1.0743215084075928, | |
| "logps/chosen": -514.7222900390625, | |
| "logps/rejected": -580.4427490234375, | |
| "loss": 0.5834, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -2.9022274017333984, | |
| "rewards/margins": 0.9829545021057129, | |
| "rewards/rejected": -3.8851819038391113, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 8.8125, | |
| "learning_rate": 2.0052744209246682e-06, | |
| "logits/chosen": -1.3135536909103394, | |
| "logits/rejected": -1.1998984813690186, | |
| "logps/chosen": -542.7693481445312, | |
| "logps/rejected": -606.01123046875, | |
| "loss": 0.5182, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -2.874311685562134, | |
| "rewards/margins": 0.9611810445785522, | |
| "rewards/rejected": -3.8354930877685547, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 9.625, | |
| "learning_rate": 1.9829025345760127e-06, | |
| "logits/chosen": -1.3124678134918213, | |
| "logits/rejected": -1.2832801342010498, | |
| "logps/chosen": -549.1907958984375, | |
| "logps/rejected": -632.5858764648438, | |
| "loss": 0.5333, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -2.7018771171569824, | |
| "rewards/margins": 0.8947007060050964, | |
| "rewards/rejected": -3.5965774059295654, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 7.65625, | |
| "learning_rate": 1.9605738257169115e-06, | |
| "logits/chosen": -1.2838572263717651, | |
| "logits/rejected": -1.117290735244751, | |
| "logps/chosen": -497.5326232910156, | |
| "logps/rejected": -604.8740234375, | |
| "loss": 0.4837, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -2.6455423831939697, | |
| "rewards/margins": 1.1532337665557861, | |
| "rewards/rejected": -3.798776149749756, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 9.9375, | |
| "learning_rate": 1.9382901587881275e-06, | |
| "logits/chosen": -1.3377434015274048, | |
| "logits/rejected": -1.2184029817581177, | |
| "logps/chosen": -514.0582275390625, | |
| "logps/rejected": -602.654052734375, | |
| "loss": 0.4292, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -2.5530195236206055, | |
| "rewards/margins": 1.208957552909851, | |
| "rewards/rejected": -3.761976957321167, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 10.5625, | |
| "learning_rate": 1.916053394469437e-06, | |
| "logits/chosen": -1.3620846271514893, | |
| "logits/rejected": -1.1589324474334717, | |
| "logps/chosen": -535.8505859375, | |
| "logps/rejected": -625.4491577148438, | |
| "loss": 0.5293, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -2.725583791732788, | |
| "rewards/margins": 1.0414365530014038, | |
| "rewards/rejected": -3.7670199871063232, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 11.3125, | |
| "learning_rate": 1.8938653895242604e-06, | |
| "logits/chosen": -1.3228267431259155, | |
| "logits/rejected": -1.1428587436676025, | |
| "logps/chosen": -536.3853759765625, | |
| "logps/rejected": -627.5452880859375, | |
| "loss": 0.441, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -2.720797300338745, | |
| "rewards/margins": 1.1999356746673584, | |
| "rewards/rejected": -3.9207332134246826, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 10.125, | |
| "learning_rate": 1.8717279966446267e-06, | |
| "logits/chosen": -1.1800651550292969, | |
| "logits/rejected": -1.102126955986023, | |
| "logps/chosen": -539.4421997070312, | |
| "logps/rejected": -641.0511474609375, | |
| "loss": 0.4566, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -2.90867018699646, | |
| "rewards/margins": 1.1115381717681885, | |
| "rewards/rejected": -4.020208358764648, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 10.125, | |
| "learning_rate": 1.8496430642964698e-06, | |
| "logits/chosen": -1.258175015449524, | |
| "logits/rejected": -1.1534559726715088, | |
| "logps/chosen": -557.5374755859375, | |
| "logps/rejected": -637.3475341796875, | |
| "loss": 0.51, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -2.8512537479400635, | |
| "rewards/margins": 1.0346183776855469, | |
| "rewards/rejected": -3.8858723640441895, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 8.75, | |
| "learning_rate": 1.827612436565286e-06, | |
| "logits/chosen": -1.2754342555999756, | |
| "logits/rejected": -1.123130440711975, | |
| "logps/chosen": -543.8443603515625, | |
| "logps/rejected": -633.3651123046875, | |
| "loss": 0.4782, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -2.808797836303711, | |
| "rewards/margins": 1.1008532047271729, | |
| "rewards/rejected": -3.909651279449463, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "eval_logits/chosen": -1.1977647542953491, | |
| "eval_logits/rejected": -1.0794349908828735, | |
| "eval_logps/chosen": -549.680419921875, | |
| "eval_logps/rejected": -637.0914306640625, | |
| "eval_loss": 0.4923146665096283, | |
| "eval_rewards/accuracies": 0.7419999837875366, | |
| "eval_rewards/chosen": -2.8502840995788574, | |
| "eval_rewards/margins": 1.0745435953140259, | |
| "eval_rewards/rejected": -3.9248275756835938, | |
| "eval_runtime": 385.0636, | |
| "eval_samples_per_second": 5.194, | |
| "eval_steps_per_second": 0.649, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 13.875, | |
| "learning_rate": 1.8056379530021492e-06, | |
| "logits/chosen": -1.3143008947372437, | |
| "logits/rejected": -1.2356500625610352, | |
| "logps/chosen": -539.9703979492188, | |
| "logps/rejected": -599.3643188476562, | |
| "loss": 0.5312, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -2.9577202796936035, | |
| "rewards/margins": 0.8420518040657043, | |
| "rewards/rejected": -3.799771785736084, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 11.0625, | |
| "learning_rate": 1.7837214484701154e-06, | |
| "logits/chosen": -1.3325443267822266, | |
| "logits/rejected": -1.2115572690963745, | |
| "logps/chosen": -515.3961181640625, | |
| "logps/rejected": -601.1583862304688, | |
| "loss": 0.4782, | |
| "rewards/accuracies": 0.8062499761581421, | |
| "rewards/chosen": -2.630460262298584, | |
| "rewards/margins": 1.091335415840149, | |
| "rewards/rejected": -3.7217955589294434, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 13.9375, | |
| "learning_rate": 1.7618647529910043e-06, | |
| "logits/chosen": -1.3422627449035645, | |
| "logits/rejected": -1.2155346870422363, | |
| "logps/chosen": -517.1422119140625, | |
| "logps/rejected": -613.8555908203125, | |
| "loss": 0.5001, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -2.6036880016326904, | |
| "rewards/margins": 1.0863001346588135, | |
| "rewards/rejected": -3.689988613128662, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 9.25, | |
| "learning_rate": 1.7400696915925996e-06, | |
| "logits/chosen": -1.3564714193344116, | |
| "logits/rejected": -1.1683833599090576, | |
| "logps/chosen": -539.3397216796875, | |
| "logps/rejected": -584.2203979492188, | |
| "loss": 0.5162, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -2.687293529510498, | |
| "rewards/margins": 1.0315442085266113, | |
| "rewards/rejected": -3.7188377380371094, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 11.125, | |
| "learning_rate": 1.718338084156254e-06, | |
| "logits/chosen": -1.3139379024505615, | |
| "logits/rejected": -1.1639807224273682, | |
| "logps/chosen": -541.3829956054688, | |
| "logps/rejected": -613.3155517578125, | |
| "loss": 0.4505, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -2.545919179916382, | |
| "rewards/margins": 1.1031758785247803, | |
| "rewards/rejected": -3.649095058441162, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 10.8125, | |
| "learning_rate": 1.6966717452649372e-06, | |
| "logits/chosen": -1.4163377285003662, | |
| "logits/rejected": -1.2610633373260498, | |
| "logps/chosen": -529.8837890625, | |
| "logps/rejected": -588.6536254882812, | |
| "loss": 0.4533, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -2.5294137001037598, | |
| "rewards/margins": 1.1063209772109985, | |
| "rewards/rejected": -3.6357345581054688, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 7.78125, | |
| "learning_rate": 1.6750724840517103e-06, | |
| "logits/chosen": -1.3619472980499268, | |
| "logits/rejected": -1.2863503694534302, | |
| "logps/chosen": -506.430908203125, | |
| "logps/rejected": -603.09228515625, | |
| "loss": 0.5196, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -2.5362462997436523, | |
| "rewards/margins": 0.925518810749054, | |
| "rewards/rejected": -3.4617652893066406, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 14.375, | |
| "learning_rate": 1.6535421040486686e-06, | |
| "logits/chosen": -1.175429105758667, | |
| "logits/rejected": -1.0819575786590576, | |
| "logps/chosen": -522.57373046875, | |
| "logps/rejected": -610.5762939453125, | |
| "loss": 0.4362, | |
| "rewards/accuracies": 0.8187500238418579, | |
| "rewards/chosen": -2.69221830368042, | |
| "rewards/margins": 1.2225408554077148, | |
| "rewards/rejected": -3.914759874343872, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 12.25, | |
| "learning_rate": 1.6320824030363458e-06, | |
| "logits/chosen": -1.2581863403320312, | |
| "logits/rejected": -1.1994664669036865, | |
| "logps/chosen": -505.96783447265625, | |
| "logps/rejected": -609.1953735351562, | |
| "loss": 0.4515, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -2.7113680839538574, | |
| "rewards/margins": 1.186835527420044, | |
| "rewards/rejected": -3.8982033729553223, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 13.75, | |
| "learning_rate": 1.6106951728936028e-06, | |
| "logits/chosen": -1.3734843730926514, | |
| "logits/rejected": -1.2433956861495972, | |
| "logps/chosen": -518.4763793945312, | |
| "logps/rejected": -614.0827026367188, | |
| "loss": 0.4983, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -2.6127266883850098, | |
| "rewards/margins": 1.034812569618225, | |
| "rewards/rejected": -3.6475391387939453, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eval_logits/chosen": -1.2522040605545044, | |
| "eval_logits/rejected": -1.1292414665222168, | |
| "eval_logps/chosen": -521.7777709960938, | |
| "eval_logps/rejected": -610.1890258789062, | |
| "eval_loss": 0.49058130383491516, | |
| "eval_rewards/accuracies": 0.7409999966621399, | |
| "eval_rewards/chosen": -2.5712568759918213, | |
| "eval_rewards/margins": 1.0845470428466797, | |
| "eval_rewards/rejected": -3.655803918838501, | |
| "eval_runtime": 384.7732, | |
| "eval_samples_per_second": 5.198, | |
| "eval_steps_per_second": 0.65, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 8.4375, | |
| "learning_rate": 1.5893821994479996e-06, | |
| "logits/chosen": -1.372878909111023, | |
| "logits/rejected": -1.2597870826721191, | |
| "logps/chosen": -519.8887939453125, | |
| "logps/rejected": -593.8539428710938, | |
| "loss": 0.476, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -2.424806594848633, | |
| "rewards/margins": 1.1252799034118652, | |
| "rewards/rejected": -3.550086259841919, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 9.0625, | |
| "learning_rate": 1.5681452623266868e-06, | |
| "logits/chosen": -1.347572684288025, | |
| "logits/rejected": -1.115192174911499, | |
| "logps/chosen": -546.6536254882812, | |
| "logps/rejected": -608.205078125, | |
| "loss": 0.478, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -2.5760815143585205, | |
| "rewards/margins": 1.198232889175415, | |
| "rewards/rejected": -3.7743141651153564, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 7.9375, | |
| "learning_rate": 1.5469861348078014e-06, | |
| "logits/chosen": -1.3562158346176147, | |
| "logits/rejected": -1.2117723226547241, | |
| "logps/chosen": -505.29254150390625, | |
| "logps/rejected": -614.58251953125, | |
| "loss": 0.4407, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -2.5852229595184326, | |
| "rewards/margins": 1.1992766857147217, | |
| "rewards/rejected": -3.784499406814575, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 10.3125, | |
| "learning_rate": 1.5259065836724035e-06, | |
| "logits/chosen": -1.2109121084213257, | |
| "logits/rejected": -1.152276635169983, | |
| "logps/chosen": -509.5875549316406, | |
| "logps/rejected": -634.964111328125, | |
| "loss": 0.4268, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -2.6477839946746826, | |
| "rewards/margins": 1.3151264190673828, | |
| "rewards/rejected": -3.9629104137420654, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 19.375, | |
| "learning_rate": 1.5049083690569456e-06, | |
| "logits/chosen": -1.2700594663619995, | |
| "logits/rejected": -1.166520118713379, | |
| "logps/chosen": -509.182861328125, | |
| "logps/rejected": -621.1192626953125, | |
| "loss": 0.5163, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -2.71712589263916, | |
| "rewards/margins": 1.1634531021118164, | |
| "rewards/rejected": -3.8805785179138184, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 16.5, | |
| "learning_rate": 1.4839932443063057e-06, | |
| "logits/chosen": -1.275468349456787, | |
| "logits/rejected": -1.1098088026046753, | |
| "logps/chosen": -555.3331909179688, | |
| "logps/rejected": -615.7780151367188, | |
| "loss": 0.4743, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -2.6335442066192627, | |
| "rewards/margins": 1.167301058769226, | |
| "rewards/rejected": -3.8008453845977783, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 18.0, | |
| "learning_rate": 1.4631629558273803e-06, | |
| "logits/chosen": -1.2889525890350342, | |
| "logits/rejected": -1.1872795820236206, | |
| "logps/chosen": -510.55615234375, | |
| "logps/rejected": -586.6162109375, | |
| "loss": 0.6102, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -2.688310146331787, | |
| "rewards/margins": 0.8770621418952942, | |
| "rewards/rejected": -3.5653719902038574, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 6.71875, | |
| "learning_rate": 1.4424192429432657e-06, | |
| "logits/chosen": -1.359438419342041, | |
| "logits/rejected": -1.2795076370239258, | |
| "logps/chosen": -480.8011779785156, | |
| "logps/rejected": -599.15966796875, | |
| "loss": 0.4647, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -2.249246120452881, | |
| "rewards/margins": 1.150412917137146, | |
| "rewards/rejected": -3.3996593952178955, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 13.1875, | |
| "learning_rate": 1.421763837748016e-06, | |
| "logits/chosen": -1.326791763305664, | |
| "logits/rejected": -1.2331459522247314, | |
| "logps/chosen": -485.2764587402344, | |
| "logps/rejected": -594.4434814453125, | |
| "loss": 0.4524, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -2.346505641937256, | |
| "rewards/margins": 1.1767139434814453, | |
| "rewards/rejected": -3.523219585418701, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 10.8125, | |
| "learning_rate": 1.401198464962021e-06, | |
| "logits/chosen": -1.3617570400238037, | |
| "logits/rejected": -1.1875524520874023, | |
| "logps/chosen": -524.5842895507812, | |
| "logps/rejected": -588.7896728515625, | |
| "loss": 0.4746, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -2.5600085258483887, | |
| "rewards/margins": 1.0283123254776, | |
| "rewards/rejected": -3.5883209705352783, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "eval_logits/chosen": -1.2491270303726196, | |
| "eval_logits/rejected": -1.1266547441482544, | |
| "eval_logps/chosen": -523.2234497070312, | |
| "eval_logps/rejected": -616.9339599609375, | |
| "eval_loss": 0.4946673512458801, | |
| "eval_rewards/accuracies": 0.7365000247955322, | |
| "eval_rewards/chosen": -2.585714340209961, | |
| "eval_rewards/margins": 1.1375384330749512, | |
| "eval_rewards/rejected": -3.723253011703491, | |
| "eval_runtime": 385.1919, | |
| "eval_samples_per_second": 5.192, | |
| "eval_steps_per_second": 0.649, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 10.1875, | |
| "learning_rate": 1.3807248417879896e-06, | |
| "logits/chosen": -1.3990509510040283, | |
| "logits/rejected": -1.2910901308059692, | |
| "logps/chosen": -524.749267578125, | |
| "logps/rejected": -631.2271728515625, | |
| "loss": 0.445, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -2.5168325901031494, | |
| "rewards/margins": 1.2660022974014282, | |
| "rewards/rejected": -3.782834529876709, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 25.875, | |
| "learning_rate": 1.3603446777675665e-06, | |
| "logits/chosen": -1.2434417009353638, | |
| "logits/rejected": -1.1283738613128662, | |
| "logps/chosen": -539.6519165039062, | |
| "logps/rejected": -630.5535888671875, | |
| "loss": 0.5282, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -2.7645440101623535, | |
| "rewards/margins": 1.129831314086914, | |
| "rewards/rejected": -3.8943753242492676, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 7.5625, | |
| "learning_rate": 1.3400596746385817e-06, | |
| "logits/chosen": -1.3770835399627686, | |
| "logits/rejected": -1.216672658920288, | |
| "logps/chosen": -541.1361694335938, | |
| "logps/rejected": -622.9951171875, | |
| "loss": 0.5016, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -2.7195262908935547, | |
| "rewards/margins": 1.0894376039505005, | |
| "rewards/rejected": -3.8089637756347656, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 9.3125, | |
| "learning_rate": 1.3198715261929587e-06, | |
| "logits/chosen": -1.344639539718628, | |
| "logits/rejected": -1.1973941326141357, | |
| "logps/chosen": -521.10888671875, | |
| "logps/rejected": -628.2103881835938, | |
| "loss": 0.4222, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -2.8465256690979004, | |
| "rewards/margins": 1.2073593139648438, | |
| "rewards/rejected": -4.053884983062744, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 7.96875, | |
| "learning_rate": 1.2997819181352823e-06, | |
| "logits/chosen": -1.3569964170455933, | |
| "logits/rejected": -1.2025775909423828, | |
| "logps/chosen": -566.9078369140625, | |
| "logps/rejected": -691.9054565429688, | |
| "loss": 0.4043, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -2.7007460594177246, | |
| "rewards/margins": 1.4950422048568726, | |
| "rewards/rejected": -4.195788383483887, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 16.375, | |
| "learning_rate": 1.2797925279420454e-06, | |
| "logits/chosen": -1.3312625885009766, | |
| "logits/rejected": -1.1907614469528198, | |
| "logps/chosen": -577.4212646484375, | |
| "logps/rejected": -690.229248046875, | |
| "loss": 0.4911, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -3.055377244949341, | |
| "rewards/margins": 1.2482696771621704, | |
| "rewards/rejected": -4.303646564483643, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 16.875, | |
| "learning_rate": 1.2599050247215764e-06, | |
| "logits/chosen": -1.2753608226776123, | |
| "logits/rejected": -1.1736326217651367, | |
| "logps/chosen": -555.798828125, | |
| "logps/rejected": -654.16357421875, | |
| "loss": 0.4766, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -2.978670835494995, | |
| "rewards/margins": 1.214051365852356, | |
| "rewards/rejected": -4.192722320556641, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 12.0, | |
| "learning_rate": 1.2401210690746705e-06, | |
| "logits/chosen": -1.3060388565063477, | |
| "logits/rejected": -1.1588232517242432, | |
| "logps/chosen": -556.8359985351562, | |
| "logps/rejected": -636.2288818359375, | |
| "loss": 0.5018, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -2.8967931270599365, | |
| "rewards/margins": 1.125984787940979, | |
| "rewards/rejected": -4.022777557373047, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 10.625, | |
| "learning_rate": 1.2204423129559306e-06, | |
| "logits/chosen": -1.3615459203720093, | |
| "logits/rejected": -1.3014076948165894, | |
| "logps/chosen": -538.6215209960938, | |
| "logps/rejected": -644.1961669921875, | |
| "loss": 0.5168, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -2.731085777282715, | |
| "rewards/margins": 1.0968948602676392, | |
| "rewards/rejected": -3.8279807567596436, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 15.1875, | |
| "learning_rate": 1.20087039953583e-06, | |
| "logits/chosen": -1.375808596611023, | |
| "logits/rejected": -1.252746820449829, | |
| "logps/chosen": -531.059326171875, | |
| "logps/rejected": -624.744140625, | |
| "loss": 0.514, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -2.668408155441284, | |
| "rewards/margins": 1.2005492448806763, | |
| "rewards/rejected": -3.86895751953125, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "eval_logits/chosen": -1.2462804317474365, | |
| "eval_logits/rejected": -1.1248236894607544, | |
| "eval_logps/chosen": -534.3994140625, | |
| "eval_logps/rejected": -625.0958251953125, | |
| "eval_loss": 0.4923916161060333, | |
| "eval_rewards/accuracies": 0.7354999780654907, | |
| "eval_rewards/chosen": -2.6974740028381348, | |
| "eval_rewards/margins": 1.1073981523513794, | |
| "eval_rewards/rejected": -3.8048720359802246, | |
| "eval_runtime": 385.0439, | |
| "eval_samples_per_second": 5.194, | |
| "eval_steps_per_second": 0.649, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 13.125, | |
| "learning_rate": 1.181406963063507e-06, | |
| "logits/chosen": -1.2778210639953613, | |
| "logits/rejected": -1.228360652923584, | |
| "logps/chosen": -523.0855102539062, | |
| "logps/rejected": -629.9219970703125, | |
| "loss": 0.5097, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -2.572385787963867, | |
| "rewards/margins": 1.0744374990463257, | |
| "rewards/rejected": -3.6468231678009033, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 6.6875, | |
| "learning_rate": 1.1620536287303052e-06, | |
| "logits/chosen": -1.3865063190460205, | |
| "logits/rejected": -1.2557927370071411, | |
| "logps/chosen": -545.7744750976562, | |
| "logps/rejected": -609.2724609375, | |
| "loss": 0.5395, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -2.5462071895599365, | |
| "rewards/margins": 0.9365339279174805, | |
| "rewards/rejected": -3.482741117477417, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 9.3125, | |
| "learning_rate": 1.1428120125340717e-06, | |
| "logits/chosen": -1.3251538276672363, | |
| "logits/rejected": -1.1808980703353882, | |
| "logps/chosen": -494.53924560546875, | |
| "logps/rejected": -603.8756103515625, | |
| "loss": 0.3923, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -2.404003143310547, | |
| "rewards/margins": 1.5079169273376465, | |
| "rewards/rejected": -3.9119198322296143, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 10.125, | |
| "learning_rate": 1.123683721144223e-06, | |
| "logits/chosen": -1.319456696510315, | |
| "logits/rejected": -1.213781714439392, | |
| "logps/chosen": -539.8772583007812, | |
| "logps/rejected": -638.1966552734375, | |
| "loss": 0.44, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -2.5620384216308594, | |
| "rewards/margins": 1.3065942525863647, | |
| "rewards/rejected": -3.8686325550079346, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 6.25, | |
| "learning_rate": 1.1046703517675848e-06, | |
| "logits/chosen": -1.3422720432281494, | |
| "logits/rejected": -1.2605860233306885, | |
| "logps/chosen": -512.2991943359375, | |
| "logps/rejected": -620.3077392578125, | |
| "loss": 0.522, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -2.5718300342559814, | |
| "rewards/margins": 1.018854022026062, | |
| "rewards/rejected": -3.590684175491333, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 10.75, | |
| "learning_rate": 1.085773492015028e-06, | |
| "logits/chosen": -1.3229783773422241, | |
| "logits/rejected": -1.1519359350204468, | |
| "logps/chosen": -497.25701904296875, | |
| "logps/rejected": -590.8815307617188, | |
| "loss": 0.4271, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -2.508775472640991, | |
| "rewards/margins": 1.2793452739715576, | |
| "rewards/rejected": -3.788120985031128, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 32.0, | |
| "learning_rate": 1.0669947197689034e-06, | |
| "logits/chosen": -1.2877874374389648, | |
| "logits/rejected": -1.1616923809051514, | |
| "logps/chosen": -543.9298095703125, | |
| "logps/rejected": -625.6560668945312, | |
| "loss": 0.487, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -2.712940216064453, | |
| "rewards/margins": 1.1042835712432861, | |
| "rewards/rejected": -3.8172237873077393, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 9.4375, | |
| "learning_rate": 1.048335603051291e-06, | |
| "logits/chosen": -1.282389521598816, | |
| "logits/rejected": -1.1512477397918701, | |
| "logps/chosen": -572.5489501953125, | |
| "logps/rejected": -676.9873046875, | |
| "loss": 0.4351, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -2.8474509716033936, | |
| "rewards/margins": 1.3263962268829346, | |
| "rewards/rejected": -4.173847198486328, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 9.0, | |
| "learning_rate": 1.0297976998930665e-06, | |
| "logits/chosen": -1.2781507968902588, | |
| "logits/rejected": -1.1678388118743896, | |
| "logps/chosen": -534.2879638671875, | |
| "logps/rejected": -643.2774047851562, | |
| "loss": 0.4393, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -2.7367353439331055, | |
| "rewards/margins": 1.3677116632461548, | |
| "rewards/rejected": -4.104446887969971, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 8.4375, | |
| "learning_rate": 1.0113825582038078e-06, | |
| "logits/chosen": -1.3029879331588745, | |
| "logits/rejected": -1.196803092956543, | |
| "logps/chosen": -556.0444946289062, | |
| "logps/rejected": -652.0103149414062, | |
| "loss": 0.4662, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -2.9077467918395996, | |
| "rewards/margins": 1.127124547958374, | |
| "rewards/rejected": -4.0348711013793945, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "eval_logits/chosen": -1.2345499992370605, | |
| "eval_logits/rejected": -1.1134350299835205, | |
| "eval_logps/chosen": -547.6557006835938, | |
| "eval_logps/rejected": -641.2913208007812, | |
| "eval_loss": 0.4899207055568695, | |
| "eval_rewards/accuracies": 0.7379999756813049, | |
| "eval_rewards/chosen": -2.830036163330078, | |
| "eval_rewards/margins": 1.1367909908294678, | |
| "eval_rewards/rejected": -3.966827154159546, | |
| "eval_runtime": 384.9651, | |
| "eval_samples_per_second": 5.195, | |
| "eval_steps_per_second": 0.649, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 9.5625, | |
| "learning_rate": 9.930917156425477e-07, | |
| "logits/chosen": -1.2949634790420532, | |
| "logits/rejected": -1.183593988418579, | |
| "logps/chosen": -563.5440673828125, | |
| "logps/rejected": -668.3243408203125, | |
| "loss": 0.5295, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -3.0242040157318115, | |
| "rewards/margins": 1.0766557455062866, | |
| "rewards/rejected": -4.100859642028809, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 19.0, | |
| "learning_rate": 9.749266994893756e-07, | |
| "logits/chosen": -1.2192089557647705, | |
| "logits/rejected": -1.0985405445098877, | |
| "logps/chosen": -531.9083251953125, | |
| "logps/rejected": -606.6322021484375, | |
| "loss": 0.5603, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -2.9121012687683105, | |
| "rewards/margins": 0.8815471529960632, | |
| "rewards/rejected": -3.7936484813690186, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 12.125, | |
| "learning_rate": 9.56889026517913e-07, | |
| "logits/chosen": -1.2642897367477417, | |
| "logits/rejected": -1.1569067239761353, | |
| "logps/chosen": -561.394287109375, | |
| "logps/rejected": -641.754638671875, | |
| "loss": 0.5072, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -3.0005598068237305, | |
| "rewards/margins": 1.063594102859497, | |
| "rewards/rejected": -4.064153671264648, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 7.40625, | |
| "learning_rate": 9.389802028686617e-07, | |
| "logits/chosen": -1.3579823970794678, | |
| "logits/rejected": -1.2555077075958252, | |
| "logps/chosen": -551.67626953125, | |
| "logps/rejected": -596.185546875, | |
| "loss": 0.5982, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -2.90908145904541, | |
| "rewards/margins": 0.7748203277587891, | |
| "rewards/rejected": -3.6839020252227783, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 9.75, | |
| "learning_rate": 9.212017239232427e-07, | |
| "logits/chosen": -1.2956401109695435, | |
| "logits/rejected": -1.1352595090866089, | |
| "logps/chosen": -550.6188354492188, | |
| "logps/rejected": -647.8556518554688, | |
| "loss": 0.4704, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -2.7766430377960205, | |
| "rewards/margins": 1.2017085552215576, | |
| "rewards/rejected": -3.97835111618042, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 9.875, | |
| "learning_rate": 9.03555074179533e-07, | |
| "logits/chosen": -1.2600593566894531, | |
| "logits/rejected": -1.2393784523010254, | |
| "logps/chosen": -524.3843994140625, | |
| "logps/rejected": -654.7698364257812, | |
| "loss": 0.4337, | |
| "rewards/accuracies": 0.8062499761581421, | |
| "rewards/chosen": -2.6760458946228027, | |
| "rewards/margins": 1.2455599308013916, | |
| "rewards/rejected": -3.921605348587036, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 20.5, | |
| "learning_rate": 8.860417271277067e-07, | |
| "logits/chosen": -1.3854873180389404, | |
| "logits/rejected": -1.3558924198150635, | |
| "logps/chosen": -545.82568359375, | |
| "logps/rejected": -628.0182495117188, | |
| "loss": 0.4992, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -2.765906810760498, | |
| "rewards/margins": 0.901807963848114, | |
| "rewards/rejected": -3.6677145957946777, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 10.125, | |
| "learning_rate": 8.686631451272029e-07, | |
| "logits/chosen": -1.3561471700668335, | |
| "logits/rejected": -1.2010211944580078, | |
| "logps/chosen": -551.3495483398438, | |
| "logps/rejected": -639.118896484375, | |
| "loss": 0.5022, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -2.979217052459717, | |
| "rewards/margins": 1.1320674419403076, | |
| "rewards/rejected": -4.1112847328186035, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 9.3125, | |
| "learning_rate": 8.514207792846168e-07, | |
| "logits/chosen": -1.3641732931137085, | |
| "logits/rejected": -1.2438944578170776, | |
| "logps/chosen": -541.0029296875, | |
| "logps/rejected": -626.8678588867188, | |
| "loss": 0.487, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -2.901430130004883, | |
| "rewards/margins": 1.142988681793213, | |
| "rewards/rejected": -4.044418811798096, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 8.5, | |
| "learning_rate": 8.343160693325356e-07, | |
| "logits/chosen": -1.2573918104171753, | |
| "logits/rejected": -1.1431939601898193, | |
| "logps/chosen": -554.5100708007812, | |
| "logps/rejected": -662.68212890625, | |
| "loss": 0.5111, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -2.98957896232605, | |
| "rewards/margins": 1.1248613595962524, | |
| "rewards/rejected": -4.11444091796875, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "eval_logits/chosen": -1.2396172285079956, | |
| "eval_logits/rejected": -1.1188315153121948, | |
| "eval_logps/chosen": -558.570556640625, | |
| "eval_logps/rejected": -650.9627075195312, | |
| "eval_loss": 0.48732802271842957, | |
| "eval_rewards/accuracies": 0.7404999732971191, | |
| "eval_rewards/chosen": -2.9391860961914062, | |
| "eval_rewards/margins": 1.1243551969528198, | |
| "eval_rewards/rejected": -4.063540935516357, | |
| "eval_runtime": 385.3295, | |
| "eval_samples_per_second": 5.19, | |
| "eval_steps_per_second": 0.649, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 7.8125, | |
| "learning_rate": 8.173504435093174e-07, | |
| "logits/chosen": -1.252179741859436, | |
| "logits/rejected": -1.0778075456619263, | |
| "logps/chosen": -531.073974609375, | |
| "logps/rejected": -619.1007690429688, | |
| "loss": 0.4851, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -2.895054817199707, | |
| "rewards/margins": 1.2014925479888916, | |
| "rewards/rejected": -4.096547603607178, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 6.84375, | |
| "learning_rate": 8.00525318439836e-07, | |
| "logits/chosen": -1.2942620515823364, | |
| "logits/rejected": -1.1525405645370483, | |
| "logps/chosen": -569.043701171875, | |
| "logps/rejected": -657.7420043945312, | |
| "loss": 0.5304, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -2.9221444129943848, | |
| "rewards/margins": 0.9633318185806274, | |
| "rewards/rejected": -3.8854763507843018, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 7.53125, | |
| "learning_rate": 7.838420990171927e-07, | |
| "logits/chosen": -1.3769783973693848, | |
| "logits/rejected": -1.217556357383728, | |
| "logps/chosen": -552.2919921875, | |
| "logps/rejected": -631.7188720703125, | |
| "loss": 0.5073, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -2.8292160034179688, | |
| "rewards/margins": 1.050167202949524, | |
| "rewards/rejected": -3.879383087158203, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 9.5, | |
| "learning_rate": 7.673021782854084e-07, | |
| "logits/chosen": -1.2488492727279663, | |
| "logits/rejected": -1.1089154481887817, | |
| "logps/chosen": -549.6131591796875, | |
| "logps/rejected": -629.2005615234375, | |
| "loss": 0.4792, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -2.8892455101013184, | |
| "rewards/margins": 1.214680790901184, | |
| "rewards/rejected": -4.103926658630371, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 11.75, | |
| "learning_rate": 7.509069373231039e-07, | |
| "logits/chosen": -1.259916067123413, | |
| "logits/rejected": -1.1467456817626953, | |
| "logps/chosen": -547.0595092773438, | |
| "logps/rejected": -607.7587280273438, | |
| "loss": 0.5723, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -2.9437592029571533, | |
| "rewards/margins": 0.854836106300354, | |
| "rewards/rejected": -3.798595428466797, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 7.34375, | |
| "learning_rate": 7.346577451281822e-07, | |
| "logits/chosen": -1.275743007659912, | |
| "logits/rejected": -1.1921640634536743, | |
| "logps/chosen": -545.425537109375, | |
| "logps/rejected": -653.1339111328125, | |
| "loss": 0.4519, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -2.832059383392334, | |
| "rewards/margins": 1.3181660175323486, | |
| "rewards/rejected": -4.150225639343262, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 12.25, | |
| "learning_rate": 7.185559585035138e-07, | |
| "logits/chosen": -1.3098807334899902, | |
| "logits/rejected": -1.1533119678497314, | |
| "logps/chosen": -584.9642333984375, | |
| "logps/rejected": -682.4730224609375, | |
| "loss": 0.4797, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -3.021878957748413, | |
| "rewards/margins": 1.133847951889038, | |
| "rewards/rejected": -4.155727386474609, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 8.625, | |
| "learning_rate": 7.026029219436504e-07, | |
| "logits/chosen": -1.3365461826324463, | |
| "logits/rejected": -1.1761207580566406, | |
| "logps/chosen": -542.1203002929688, | |
| "logps/rejected": -646.118896484375, | |
| "loss": 0.4723, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -2.8982977867126465, | |
| "rewards/margins": 1.1679728031158447, | |
| "rewards/rejected": -4.0662713050842285, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 7.0, | |
| "learning_rate": 6.867999675225523e-07, | |
| "logits/chosen": -1.3771815299987793, | |
| "logits/rejected": -1.2472676038742065, | |
| "logps/chosen": -512.2825317382812, | |
| "logps/rejected": -608.7750854492188, | |
| "loss": 0.487, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -2.8136465549468994, | |
| "rewards/margins": 1.1070338487625122, | |
| "rewards/rejected": -3.920680284500122, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 10.6875, | |
| "learning_rate": 6.711484147823663e-07, | |
| "logits/chosen": -1.2860959768295288, | |
| "logits/rejected": -1.2111051082611084, | |
| "logps/chosen": -506.64581298828125, | |
| "logps/rejected": -628.4481811523438, | |
| "loss": 0.4758, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -2.7242074012756348, | |
| "rewards/margins": 1.1695196628570557, | |
| "rewards/rejected": -3.8937268257141113, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_logits/chosen": -1.2526096105575562, | |
| "eval_logits/rejected": -1.1318107843399048, | |
| "eval_logps/chosen": -550.865478515625, | |
| "eval_logps/rejected": -638.7723999023438, | |
| "eval_loss": 0.4866448938846588, | |
| "eval_rewards/accuracies": 0.7409999966621399, | |
| "eval_rewards/chosen": -2.8621349334716797, | |
| "eval_rewards/margins": 1.079501986503601, | |
| "eval_rewards/rejected": -3.9416370391845703, | |
| "eval_runtime": 385.0884, | |
| "eval_samples_per_second": 5.194, | |
| "eval_steps_per_second": 0.649, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 10.625, | |
| "learning_rate": 6.556495706232413e-07, | |
| "logits/chosen": -1.2896664142608643, | |
| "logits/rejected": -1.1979024410247803, | |
| "logps/chosen": -560.0714721679688, | |
| "logps/rejected": -646.5289916992188, | |
| "loss": 0.5296, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -2.9137609004974365, | |
| "rewards/margins": 1.0487867593765259, | |
| "rewards/rejected": -3.9625473022460938, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 9.6875, | |
| "learning_rate": 6.403047291942057e-07, | |
| "logits/chosen": -1.2192307710647583, | |
| "logits/rejected": -1.0712454319000244, | |
| "logps/chosen": -515.818115234375, | |
| "logps/rejected": -601.6507568359375, | |
| "loss": 0.4944, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -2.895503520965576, | |
| "rewards/margins": 1.1022310256958008, | |
| "rewards/rejected": -3.997734785079956, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 12.6875, | |
| "learning_rate": 6.251151717851023e-07, | |
| "logits/chosen": -1.2880637645721436, | |
| "logits/rejected": -1.2091928720474243, | |
| "logps/chosen": -509.5738220214844, | |
| "logps/rejected": -608.5218505859375, | |
| "loss": 0.4853, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -2.7617316246032715, | |
| "rewards/margins": 1.1223886013031006, | |
| "rewards/rejected": -3.884120464324951, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 6.40625, | |
| "learning_rate": 6.100821667196041e-07, | |
| "logits/chosen": -1.4694463014602661, | |
| "logits/rejected": -1.2010104656219482, | |
| "logps/chosen": -551.3878173828125, | |
| "logps/rejected": -589.3790283203125, | |
| "loss": 0.4979, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -2.7574946880340576, | |
| "rewards/margins": 1.0347812175750732, | |
| "rewards/rejected": -3.792275905609131, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 29.5, | |
| "learning_rate": 5.952069692493062e-07, | |
| "logits/chosen": -1.2609448432922363, | |
| "logits/rejected": -1.1505969762802124, | |
| "logps/chosen": -498.6568908691406, | |
| "logps/rejected": -627.9306640625, | |
| "loss": 0.4171, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -2.703416347503662, | |
| "rewards/margins": 1.2583777904510498, | |
| "rewards/rejected": -3.961793899536133, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 10.625, | |
| "learning_rate": 5.80490821448918e-07, | |
| "logits/chosen": -1.216658353805542, | |
| "logits/rejected": -1.2167049646377563, | |
| "logps/chosen": -540.7564086914062, | |
| "logps/rejected": -711.563232421875, | |
| "loss": 0.4298, | |
| "rewards/accuracies": 0.793749988079071, | |
| "rewards/chosen": -2.7823424339294434, | |
| "rewards/margins": 1.2834153175354004, | |
| "rewards/rejected": -4.065757751464844, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 9.5625, | |
| "learning_rate": 5.659349521125459e-07, | |
| "logits/chosen": -1.4194704294204712, | |
| "logits/rejected": -1.3601640462875366, | |
| "logps/chosen": -555.782958984375, | |
| "logps/rejected": -634.6406860351562, | |
| "loss": 0.5047, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -2.743645191192627, | |
| "rewards/margins": 0.9945963025093079, | |
| "rewards/rejected": -3.7382407188415527, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 6.4375, | |
| "learning_rate": 5.5154057665109e-07, | |
| "logits/chosen": -1.3637388944625854, | |
| "logits/rejected": -1.216048240661621, | |
| "logps/chosen": -546.4483642578125, | |
| "logps/rejected": -646.1047973632812, | |
| "loss": 0.4807, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -2.855586528778076, | |
| "rewards/margins": 1.2608329057693481, | |
| "rewards/rejected": -4.116419792175293, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 11.8125, | |
| "learning_rate": 5.373088969907586e-07, | |
| "logits/chosen": -1.3931351900100708, | |
| "logits/rejected": -1.2272682189941406, | |
| "logps/chosen": -558.13232421875, | |
| "logps/rejected": -618.197265625, | |
| "loss": 0.4482, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -2.8131866455078125, | |
| "rewards/margins": 1.1027860641479492, | |
| "rewards/rejected": -3.915972948074341, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 7.53125, | |
| "learning_rate": 5.23241101472709e-07, | |
| "logits/chosen": -1.3162554502487183, | |
| "logits/rejected": -1.1940876245498657, | |
| "logps/chosen": -549.4010009765625, | |
| "logps/rejected": -625.9002075195312, | |
| "loss": 0.4908, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -2.7309937477111816, | |
| "rewards/margins": 0.9850690960884094, | |
| "rewards/rejected": -3.7160630226135254, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "eval_logits/chosen": -1.2554689645767212, | |
| "eval_logits/rejected": -1.1347445249557495, | |
| "eval_logps/chosen": -549.6837158203125, | |
| "eval_logps/rejected": -638.7192993164062, | |
| "eval_loss": 0.4868563115596771, | |
| "eval_rewards/accuracies": 0.7419999837875366, | |
| "eval_rewards/chosen": -2.8503170013427734, | |
| "eval_rewards/margins": 1.0907903909683228, | |
| "eval_rewards/rejected": -3.9411072731018066, | |
| "eval_runtime": 385.4515, | |
| "eval_samples_per_second": 5.189, | |
| "eval_steps_per_second": 0.649, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 8.375, | |
| "learning_rate": 5.09338364753818e-07, | |
| "logits/chosen": -1.3838107585906982, | |
| "logits/rejected": -1.2234851121902466, | |
| "logps/chosen": -565.4810791015625, | |
| "logps/rejected": -655.7274169921875, | |
| "loss": 0.5191, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -2.800589084625244, | |
| "rewards/margins": 1.0603386163711548, | |
| "rewards/rejected": -3.8609280586242676, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 11.0625, | |
| "learning_rate": 4.956018477086005e-07, | |
| "logits/chosen": -1.3474712371826172, | |
| "logits/rejected": -1.1852939128875732, | |
| "logps/chosen": -559.21142578125, | |
| "logps/rejected": -640.77685546875, | |
| "loss": 0.5116, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -2.9007859230041504, | |
| "rewards/margins": 1.0891984701156616, | |
| "rewards/rejected": -3.9899849891662598, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 12.125, | |
| "learning_rate": 4.820326973322764e-07, | |
| "logits/chosen": -1.2560558319091797, | |
| "logits/rejected": -1.1815481185913086, | |
| "logps/chosen": -549.0807495117188, | |
| "logps/rejected": -643.4081420898438, | |
| "loss": 0.5513, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -2.9811716079711914, | |
| "rewards/margins": 1.0034395456314087, | |
| "rewards/rejected": -3.9846110343933105, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 10.5, | |
| "learning_rate": 4.686320466449981e-07, | |
| "logits/chosen": -1.2670228481292725, | |
| "logits/rejected": -1.0823358297348022, | |
| "logps/chosen": -515.7471923828125, | |
| "logps/rejected": -646.492919921875, | |
| "loss": 0.454, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -2.7649807929992676, | |
| "rewards/margins": 1.3290727138519287, | |
| "rewards/rejected": -4.094053745269775, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 6.8125, | |
| "learning_rate": 4.554010145972418e-07, | |
| "logits/chosen": -1.4123005867004395, | |
| "logits/rejected": -1.2410565614700317, | |
| "logps/chosen": -551.8477783203125, | |
| "logps/rejected": -645.891357421875, | |
| "loss": 0.5464, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -2.880317449569702, | |
| "rewards/margins": 1.0536738634109497, | |
| "rewards/rejected": -3.9339919090270996, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 8.75, | |
| "learning_rate": 4.4234070597637455e-07, | |
| "logits/chosen": -1.2695270776748657, | |
| "logits/rejected": -1.1814700365066528, | |
| "logps/chosen": -558.7033081054688, | |
| "logps/rejected": -645.794189453125, | |
| "loss": 0.5261, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -2.831700086593628, | |
| "rewards/margins": 0.990101158618927, | |
| "rewards/rejected": -3.8218014240264893, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 6.75, | |
| "learning_rate": 4.2945221131440783e-07, | |
| "logits/chosen": -1.244091272354126, | |
| "logits/rejected": -1.0454550981521606, | |
| "logps/chosen": -539.8818359375, | |
| "logps/rejected": -634.0319213867188, | |
| "loss": 0.43, | |
| "rewards/accuracies": 0.8374999761581421, | |
| "rewards/chosen": -2.7417213916778564, | |
| "rewards/margins": 1.257968544960022, | |
| "rewards/rejected": -3.9996895790100098, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 9.6875, | |
| "learning_rate": 4.167366067969381e-07, | |
| "logits/chosen": -1.3269858360290527, | |
| "logits/rejected": -1.2656229734420776, | |
| "logps/chosen": -505.6949157714844, | |
| "logps/rejected": -628.41015625, | |
| "loss": 0.4885, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -2.827846050262451, | |
| "rewards/margins": 0.9887911677360535, | |
| "rewards/rejected": -3.816636562347412, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 7.0, | |
| "learning_rate": 4.041949541732826e-07, | |
| "logits/chosen": -1.327467441558838, | |
| "logits/rejected": -1.272200584411621, | |
| "logps/chosen": -555.987060546875, | |
| "logps/rejected": -642.5946655273438, | |
| "loss": 0.5129, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -2.9210267066955566, | |
| "rewards/margins": 1.0088088512420654, | |
| "rewards/rejected": -3.929835557937622, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 12.1875, | |
| "learning_rate": 3.9182830066782614e-07, | |
| "logits/chosen": -1.2530772686004639, | |
| "logits/rejected": -1.2375959157943726, | |
| "logps/chosen": -542.0306396484375, | |
| "logps/rejected": -671.2916259765625, | |
| "loss": 0.4641, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -2.8428683280944824, | |
| "rewards/margins": 1.1683650016784668, | |
| "rewards/rejected": -4.011233329772949, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "eval_logits/chosen": -1.2554447650909424, | |
| "eval_logits/rejected": -1.1346678733825684, | |
| "eval_logps/chosen": -545.7666015625, | |
| "eval_logps/rejected": -634.5078735351562, | |
| "eval_loss": 0.48661333322525024, | |
| "eval_rewards/accuracies": 0.7404999732971191, | |
| "eval_rewards/chosen": -2.8111462593078613, | |
| "eval_rewards/margins": 1.0878463983535767, | |
| "eval_rewards/rejected": -3.8989927768707275, | |
| "eval_runtime": 385.3303, | |
| "eval_samples_per_second": 5.19, | |
| "eval_steps_per_second": 0.649, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 8.375, | |
| "learning_rate": 3.796376788925771e-07, | |
| "logits/chosen": -1.264981985092163, | |
| "logits/rejected": -1.1978137493133545, | |
| "logps/chosen": -532.4588623046875, | |
| "logps/rejected": -602.8772583007812, | |
| "loss": 0.5036, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -2.7103111743927, | |
| "rewards/margins": 0.944588840007782, | |
| "rewards/rejected": -3.654900074005127, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 6.78125, | |
| "learning_rate": 3.676241067609465e-07, | |
| "logits/chosen": -1.3384299278259277, | |
| "logits/rejected": -1.2301527261734009, | |
| "logps/chosen": -568.9376220703125, | |
| "logps/rejected": -628.9427490234375, | |
| "loss": 0.5105, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -2.7523765563964844, | |
| "rewards/margins": 1.0309317111968994, | |
| "rewards/rejected": -3.7833080291748047, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 11.625, | |
| "learning_rate": 3.5578858740274976e-07, | |
| "logits/chosen": -1.2620373964309692, | |
| "logits/rejected": -1.1610171794891357, | |
| "logps/chosen": -548.6265258789062, | |
| "logps/rejected": -628.5254516601562, | |
| "loss": 0.5183, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -2.8689258098602295, | |
| "rewards/margins": 0.9248504638671875, | |
| "rewards/rejected": -3.793776273727417, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 12.0625, | |
| "learning_rate": 3.44132109080447e-07, | |
| "logits/chosen": -1.4505221843719482, | |
| "logits/rejected": -1.2806892395019531, | |
| "logps/chosen": -536.9176025390625, | |
| "logps/rejected": -614.2163696289062, | |
| "loss": 0.4513, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -2.71694016456604, | |
| "rewards/margins": 1.147782802581787, | |
| "rewards/rejected": -3.864722490310669, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 10.375, | |
| "learning_rate": 3.3265564510662344e-07, | |
| "logits/chosen": -1.377443790435791, | |
| "logits/rejected": -1.2464927434921265, | |
| "logps/chosen": -556.8729858398438, | |
| "logps/rejected": -654.7142333984375, | |
| "loss": 0.4331, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -2.6353092193603516, | |
| "rewards/margins": 1.2298697233200073, | |
| "rewards/rejected": -3.8651790618896484, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 16.0, | |
| "learning_rate": 3.213601537627195e-07, | |
| "logits/chosen": -1.2895920276641846, | |
| "logits/rejected": -1.1866865158081055, | |
| "logps/chosen": -556.0447998046875, | |
| "logps/rejected": -639.7942504882812, | |
| "loss": 0.5502, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -2.989259719848633, | |
| "rewards/margins": 1.0085315704345703, | |
| "rewards/rejected": -3.997791290283203, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 12.3125, | |
| "learning_rate": 3.1024657821901063e-07, | |
| "logits/chosen": -1.3556302785873413, | |
| "logits/rejected": -1.2743966579437256, | |
| "logps/chosen": -520.0645141601562, | |
| "logps/rejected": -612.8271484375, | |
| "loss": 0.5058, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -2.72322678565979, | |
| "rewards/margins": 1.0860865116119385, | |
| "rewards/rejected": -3.8093135356903076, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 13.625, | |
| "learning_rate": 2.9931584645585654e-07, | |
| "logits/chosen": -1.3034099340438843, | |
| "logits/rejected": -1.271439790725708, | |
| "logps/chosen": -548.0056762695312, | |
| "logps/rejected": -648.6056518554688, | |
| "loss": 0.514, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -2.734912395477295, | |
| "rewards/margins": 0.9768469929695129, | |
| "rewards/rejected": -3.711759090423584, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 5.9375, | |
| "learning_rate": 2.885688711862136e-07, | |
| "logits/chosen": -1.3113230466842651, | |
| "logits/rejected": -1.3101108074188232, | |
| "logps/chosen": -549.2462768554688, | |
| "logps/rejected": -667.9041748046875, | |
| "loss": 0.5153, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -2.8819470405578613, | |
| "rewards/margins": 1.2062867879867554, | |
| "rewards/rejected": -4.0882344245910645, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 12.1875, | |
| "learning_rate": 2.7800654977942486e-07, | |
| "logits/chosen": -1.3042861223220825, | |
| "logits/rejected": -1.1825424432754517, | |
| "logps/chosen": -531.413818359375, | |
| "logps/rejected": -631.4432373046875, | |
| "loss": 0.5096, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -2.740572452545166, | |
| "rewards/margins": 1.0384232997894287, | |
| "rewards/rejected": -3.778996229171753, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "eval_logits/chosen": -1.2585511207580566, | |
| "eval_logits/rejected": -1.1378772258758545, | |
| "eval_logps/chosen": -544.573974609375, | |
| "eval_logps/rejected": -633.404052734375, | |
| "eval_loss": 0.4864084720611572, | |
| "eval_rewards/accuracies": 0.7394999861717224, | |
| "eval_rewards/chosen": -2.7992191314697266, | |
| "eval_rewards/margins": 1.0887356996536255, | |
| "eval_rewards/rejected": -3.8879551887512207, | |
| "eval_runtime": 385.2344, | |
| "eval_samples_per_second": 5.192, | |
| "eval_steps_per_second": 0.649, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 15.8125, | |
| "learning_rate": 2.6762976418628797e-07, | |
| "logits/chosen": -1.345733880996704, | |
| "logits/rejected": -1.2021456956863403, | |
| "logps/chosen": -504.2335510253906, | |
| "logps/rejected": -557.3888549804688, | |
| "loss": 0.5433, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -2.744576930999756, | |
| "rewards/margins": 0.9676315188407898, | |
| "rewards/rejected": -3.7122085094451904, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 10.25, | |
| "learning_rate": 2.5743938086541354e-07, | |
| "logits/chosen": -1.3191635608673096, | |
| "logits/rejected": -1.1912063360214233, | |
| "logps/chosen": -537.1017456054688, | |
| "logps/rejected": -623.8556518554688, | |
| "loss": 0.4854, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -2.728193998336792, | |
| "rewards/margins": 1.1158511638641357, | |
| "rewards/rejected": -3.8440451622009277, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 12.5625, | |
| "learning_rate": 2.4743625071087574e-07, | |
| "logits/chosen": -1.4545891284942627, | |
| "logits/rejected": -1.2835543155670166, | |
| "logps/chosen": -547.1002807617188, | |
| "logps/rejected": -641.8110961914062, | |
| "loss": 0.481, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -2.6952996253967285, | |
| "rewards/margins": 1.241824746131897, | |
| "rewards/rejected": -3.937124252319336, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 11.9375, | |
| "learning_rate": 2.3762120898116498e-07, | |
| "logits/chosen": -1.3351377248764038, | |
| "logits/rejected": -1.2292808294296265, | |
| "logps/chosen": -560.9166259765625, | |
| "logps/rejected": -651.31103515625, | |
| "loss": 0.4951, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -2.9212374687194824, | |
| "rewards/margins": 0.9840442538261414, | |
| "rewards/rejected": -3.9052817821502686, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 9.25, | |
| "learning_rate": 2.2799507522944048e-07, | |
| "logits/chosen": -1.2632300853729248, | |
| "logits/rejected": -1.1798118352890015, | |
| "logps/chosen": -535.8150634765625, | |
| "logps/rejected": -648.6831665039062, | |
| "loss": 0.4501, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -2.7001757621765137, | |
| "rewards/margins": 1.211038589477539, | |
| "rewards/rejected": -3.9112143516540527, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 9.5625, | |
| "learning_rate": 2.1855865323510056e-07, | |
| "logits/chosen": -1.3267244100570679, | |
| "logits/rejected": -1.1418159008026123, | |
| "logps/chosen": -551.0586547851562, | |
| "logps/rejected": -682.3759765625, | |
| "loss": 0.4332, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -2.783801317214966, | |
| "rewards/margins": 1.3722645044326782, | |
| "rewards/rejected": -4.156065940856934, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 6.84375, | |
| "learning_rate": 2.0931273093666575e-07, | |
| "logits/chosen": -1.2914237976074219, | |
| "logits/rejected": -1.1433568000793457, | |
| "logps/chosen": -527.1336669921875, | |
| "logps/rejected": -625.6705322265625, | |
| "loss": 0.4545, | |
| "rewards/accuracies": 0.8062499761581421, | |
| "rewards/chosen": -2.8694703578948975, | |
| "rewards/margins": 1.1879098415374756, | |
| "rewards/rejected": -4.057379722595215, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 12.1875, | |
| "learning_rate": 2.002580803659873e-07, | |
| "logits/chosen": -1.2892788648605347, | |
| "logits/rejected": -1.1720714569091797, | |
| "logps/chosen": -541.9439697265625, | |
| "logps/rejected": -635.7294311523438, | |
| "loss": 0.4668, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -2.873889446258545, | |
| "rewards/margins": 1.1193937063217163, | |
| "rewards/rejected": -3.9932830333709717, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 5.5625, | |
| "learning_rate": 1.913954575837826e-07, | |
| "logits/chosen": -1.3597743511199951, | |
| "logits/rejected": -1.1029024124145508, | |
| "logps/chosen": -555.9939575195312, | |
| "logps/rejected": -613.6519775390625, | |
| "loss": 0.4753, | |
| "rewards/accuracies": 0.793749988079071, | |
| "rewards/chosen": -2.8387951850891113, | |
| "rewards/margins": 1.0735225677490234, | |
| "rewards/rejected": -3.9123177528381348, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 8.875, | |
| "learning_rate": 1.827256026165028e-07, | |
| "logits/chosen": -1.37373685836792, | |
| "logits/rejected": -1.178899884223938, | |
| "logps/chosen": -578.7473754882812, | |
| "logps/rejected": -641.29345703125, | |
| "loss": 0.455, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -2.636767864227295, | |
| "rewards/margins": 1.1961476802825928, | |
| "rewards/rejected": -3.8329155445098877, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "eval_logits/chosen": -1.2543540000915527, | |
| "eval_logits/rejected": -1.1335822343826294, | |
| "eval_logps/chosen": -545.915283203125, | |
| "eval_logps/rejected": -635.4321899414062, | |
| "eval_loss": 0.48658648133277893, | |
| "eval_rewards/accuracies": 0.7394999861717224, | |
| "eval_rewards/chosen": -2.8126325607299805, | |
| "eval_rewards/margins": 1.0956026315689087, | |
| "eval_rewards/rejected": -3.9082350730895996, | |
| "eval_runtime": 385.1178, | |
| "eval_samples_per_second": 5.193, | |
| "eval_steps_per_second": 0.649, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 14.25, | |
| "learning_rate": 1.7424923939454274e-07, | |
| "logits/chosen": -1.3175909519195557, | |
| "logits/rejected": -1.1420743465423584, | |
| "logps/chosen": -559.6712036132812, | |
| "logps/rejected": -638.4949951171875, | |
| "loss": 0.4204, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -2.7832179069519043, | |
| "rewards/margins": 1.2219712734222412, | |
| "rewards/rejected": -4.005189418792725, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 18.25, | |
| "learning_rate": 1.6596707569179304e-07, | |
| "logits/chosen": -1.4023295640945435, | |
| "logits/rejected": -1.2579714059829712, | |
| "logps/chosen": -565.6453857421875, | |
| "logps/rejected": -633.6785278320312, | |
| "loss": 0.5107, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -2.844580888748169, | |
| "rewards/margins": 1.0315876007080078, | |
| "rewards/rejected": -3.8761680126190186, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 9.3125, | |
| "learning_rate": 1.578798030665385e-07, | |
| "logits/chosen": -1.3531277179718018, | |
| "logits/rejected": -1.1701006889343262, | |
| "logps/chosen": -551.0891723632812, | |
| "logps/rejected": -663.329345703125, | |
| "loss": 0.4451, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -2.7639107704162598, | |
| "rewards/margins": 1.2928920984268188, | |
| "rewards/rejected": -4.056802749633789, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 8.625, | |
| "learning_rate": 1.499880968037165e-07, | |
| "logits/chosen": -1.3360685110092163, | |
| "logits/rejected": -1.204347014427185, | |
| "logps/chosen": -529.5865478515625, | |
| "logps/rejected": -599.6653442382812, | |
| "loss": 0.5141, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -2.7397305965423584, | |
| "rewards/margins": 1.0634615421295166, | |
| "rewards/rejected": -3.803192138671875, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 14.5, | |
| "learning_rate": 1.4229261585852805e-07, | |
| "logits/chosen": -1.3513076305389404, | |
| "logits/rejected": -1.2741743326187134, | |
| "logps/chosen": -541.47216796875, | |
| "logps/rejected": -626.5045776367188, | |
| "loss": 0.457, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -2.730405807495117, | |
| "rewards/margins": 1.1085281372070312, | |
| "rewards/rejected": -3.8389339447021484, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 10.0625, | |
| "learning_rate": 1.3479400280141886e-07, | |
| "logits/chosen": -1.2767086029052734, | |
| "logits/rejected": -1.2311673164367676, | |
| "logps/chosen": -530.0853881835938, | |
| "logps/rejected": -643.3887939453125, | |
| "loss": 0.4882, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -2.8382620811462402, | |
| "rewards/margins": 1.153211236000061, | |
| "rewards/rejected": -3.9914729595184326, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 9.375, | |
| "learning_rate": 1.2749288376442044e-07, | |
| "logits/chosen": -1.3543965816497803, | |
| "logits/rejected": -1.169668436050415, | |
| "logps/chosen": -568.0045166015625, | |
| "logps/rejected": -621.7847900390625, | |
| "loss": 0.4688, | |
| "rewards/accuracies": 0.793749988079071, | |
| "rewards/chosen": -2.714702606201172, | |
| "rewards/margins": 1.1129640340805054, | |
| "rewards/rejected": -3.827666759490967, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 9.8125, | |
| "learning_rate": 1.203898683888713e-07, | |
| "logits/chosen": -1.3755584955215454, | |
| "logits/rejected": -1.2311782836914062, | |
| "logps/chosen": -532.9498901367188, | |
| "logps/rejected": -625.6002197265625, | |
| "loss": 0.5499, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": -2.8895249366760254, | |
| "rewards/margins": 0.9575474858283997, | |
| "rewards/rejected": -3.8470726013183594, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 9.75, | |
| "learning_rate": 1.1348554977451132e-07, | |
| "logits/chosen": -1.4002835750579834, | |
| "logits/rejected": -1.255327820777893, | |
| "logps/chosen": -557.9288330078125, | |
| "logps/rejected": -629.8707275390625, | |
| "loss": 0.5076, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -2.759397029876709, | |
| "rewards/margins": 1.0484449863433838, | |
| "rewards/rejected": -3.8078417778015137, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 6.75, | |
| "learning_rate": 1.0678050442995802e-07, | |
| "logits/chosen": -1.345915675163269, | |
| "logits/rejected": -1.1581257581710815, | |
| "logps/chosen": -561.4415283203125, | |
| "logps/rejected": -623.0227661132812, | |
| "loss": 0.5262, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -2.8064818382263184, | |
| "rewards/margins": 1.0665854215621948, | |
| "rewards/rejected": -3.8730673789978027, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "eval_logits/chosen": -1.255007028579712, | |
| "eval_logits/rejected": -1.1342185735702515, | |
| "eval_logps/chosen": -545.7534790039062, | |
| "eval_logps/rejected": -635.4207153320312, | |
| "eval_loss": 0.4864389896392822, | |
| "eval_rewards/accuracies": 0.7409999966621399, | |
| "eval_rewards/chosen": -2.8110146522521973, | |
| "eval_rewards/margins": 1.0971060991287231, | |
| "eval_rewards/rejected": -3.908120632171631, | |
| "eval_runtime": 385.1023, | |
| "eval_samples_per_second": 5.193, | |
| "eval_steps_per_second": 0.649, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 9.0, | |
| "learning_rate": 1.0027529222456755e-07, | |
| "logits/chosen": -1.3322703838348389, | |
| "logits/rejected": -1.164650321006775, | |
| "logps/chosen": -529.0792236328125, | |
| "logps/rejected": -624.087646484375, | |
| "loss": 0.4498, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -2.7491297721862793, | |
| "rewards/margins": 1.125643014907837, | |
| "rewards/rejected": -3.8747730255126953, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 8.375, | |
| "learning_rate": 9.397045634168766e-08, | |
| "logits/chosen": -1.36007821559906, | |
| "logits/rejected": -1.2929136753082275, | |
| "logps/chosen": -542.2086181640625, | |
| "logps/rejected": -667.2059326171875, | |
| "loss": 0.4547, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -2.736257553100586, | |
| "rewards/margins": 1.2475742101669312, | |
| "rewards/rejected": -3.9838318824768066, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 11.625, | |
| "learning_rate": 8.78665232332998e-08, | |
| "logits/chosen": -1.2781856060028076, | |
| "logits/rejected": -1.210409164428711, | |
| "logps/chosen": -516.8308715820312, | |
| "logps/rejected": -617.1106567382812, | |
| "loss": 0.4795, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -2.8612074851989746, | |
| "rewards/margins": 1.0104413032531738, | |
| "rewards/rejected": -3.8716487884521484, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 9.375, | |
| "learning_rate": 8.196400257606208e-08, | |
| "logits/chosen": -1.385122537612915, | |
| "logits/rejected": -1.24273681640625, | |
| "logps/chosen": -560.0970458984375, | |
| "logps/rejected": -682.8623046875, | |
| "loss": 0.4425, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -2.7701196670532227, | |
| "rewards/margins": 1.2804229259490967, | |
| "rewards/rejected": -4.05054235458374, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 9.9375, | |
| "learning_rate": 7.626338722875076e-08, | |
| "logits/chosen": -1.3233528137207031, | |
| "logits/rejected": -1.269012212753296, | |
| "logps/chosen": -527.5838623046875, | |
| "logps/rejected": -637.5836791992188, | |
| "loss": 0.4828, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -2.7349019050598145, | |
| "rewards/margins": 1.0903024673461914, | |
| "rewards/rejected": -3.8252041339874268, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 7.03125, | |
| "learning_rate": 7.076515319110688e-08, | |
| "logits/chosen": -1.3301162719726562, | |
| "logits/rejected": -1.2381629943847656, | |
| "logps/chosen": -530.2079467773438, | |
| "logps/rejected": -606.7450561523438, | |
| "loss": 0.5044, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -2.739201545715332, | |
| "rewards/margins": 1.1811503171920776, | |
| "rewards/rejected": -3.9203522205352783, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 7.90625, | |
| "learning_rate": 6.54697595640899e-08, | |
| "logits/chosen": -1.3412398099899292, | |
| "logits/rejected": -1.2113425731658936, | |
| "logps/chosen": -574.6760864257812, | |
| "logps/rejected": -662.1055908203125, | |
| "loss": 0.4814, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -2.8245387077331543, | |
| "rewards/margins": 1.1259464025497437, | |
| "rewards/rejected": -3.9504852294921875, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 10.125, | |
| "learning_rate": 6.037764851154426e-08, | |
| "logits/chosen": -1.3283928632736206, | |
| "logits/rejected": -1.286163568496704, | |
| "logps/chosen": -535.401611328125, | |
| "logps/rejected": -654.7335205078125, | |
| "loss": 0.4822, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -2.7134242057800293, | |
| "rewards/margins": 1.1457810401916504, | |
| "rewards/rejected": -3.8592045307159424, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 6.9375, | |
| "learning_rate": 5.548924522327748e-08, | |
| "logits/chosen": -1.3209599256515503, | |
| "logits/rejected": -1.182340383529663, | |
| "logps/chosen": -537.6961669921875, | |
| "logps/rejected": -631.2122192382812, | |
| "loss": 0.4825, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -2.7593882083892822, | |
| "rewards/margins": 1.081923484802246, | |
| "rewards/rejected": -3.8413116931915283, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 10.9375, | |
| "learning_rate": 5.0804957879556915e-08, | |
| "logits/chosen": -1.249495506286621, | |
| "logits/rejected": -1.1639585494995117, | |
| "logps/chosen": -500.98992919921875, | |
| "logps/rejected": -613.1359252929688, | |
| "loss": 0.466, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -2.7542366981506348, | |
| "rewards/margins": 1.0757157802581787, | |
| "rewards/rejected": -3.8299522399902344, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "eval_logits/chosen": -1.2554669380187988, | |
| "eval_logits/rejected": -1.1347417831420898, | |
| "eval_logps/chosen": -545.9835815429688, | |
| "eval_logps/rejected": -635.6727294921875, | |
| "eval_loss": 0.48658978939056396, | |
| "eval_rewards/accuracies": 0.7400000095367432, | |
| "eval_rewards/chosen": -2.813314914703369, | |
| "eval_rewards/margins": 1.0973262786865234, | |
| "eval_rewards/rejected": -3.9106414318084717, | |
| "eval_runtime": 385.0907, | |
| "eval_samples_per_second": 5.194, | |
| "eval_steps_per_second": 0.649, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 10.1875, | |
| "learning_rate": 4.632517761702815e-08, | |
| "logits/chosen": -1.2666916847229004, | |
| "logits/rejected": -1.127403974533081, | |
| "logps/chosen": -518.2024536132812, | |
| "logps/rejected": -636.0374145507812, | |
| "loss": 0.4363, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -2.827592134475708, | |
| "rewards/margins": 1.3016375303268433, | |
| "rewards/rejected": -4.129229545593262, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 12.5, | |
| "learning_rate": 4.205027849605359e-08, | |
| "logits/chosen": -1.2991037368774414, | |
| "logits/rejected": -1.2022724151611328, | |
| "logps/chosen": -558.1617431640625, | |
| "logps/rejected": -612.0789794921875, | |
| "loss": 0.68, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -3.1023831367492676, | |
| "rewards/margins": 0.8397830128669739, | |
| "rewards/rejected": -3.9421660900115967, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 9.375, | |
| "learning_rate": 3.798061746947995e-08, | |
| "logits/chosen": -1.4298536777496338, | |
| "logits/rejected": -1.2711670398712158, | |
| "logps/chosen": -541.1964721679688, | |
| "logps/rejected": -615.2937622070312, | |
| "loss": 0.479, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -2.770465135574341, | |
| "rewards/margins": 1.1328895092010498, | |
| "rewards/rejected": -3.9033546447753906, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 10.9375, | |
| "learning_rate": 3.411653435283158e-08, | |
| "logits/chosen": -1.3373726606369019, | |
| "logits/rejected": -1.1359134912490845, | |
| "logps/chosen": -546.8034057617188, | |
| "logps/rejected": -593.7394409179688, | |
| "loss": 0.4962, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -2.7354676723480225, | |
| "rewards/margins": 1.0266190767288208, | |
| "rewards/rejected": -3.762086868286133, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 7.03125, | |
| "learning_rate": 3.04583517959367e-08, | |
| "logits/chosen": -1.3844215869903564, | |
| "logits/rejected": -1.2332738637924194, | |
| "logps/chosen": -517.3903198242188, | |
| "logps/rejected": -597.7869262695312, | |
| "loss": 0.4532, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -2.6527369022369385, | |
| "rewards/margins": 1.128391146659851, | |
| "rewards/rejected": -3.7811279296875, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 9.0625, | |
| "learning_rate": 2.7006375255985984e-08, | |
| "logits/chosen": -1.3098169565200806, | |
| "logits/rejected": -1.2761331796646118, | |
| "logps/chosen": -556.2361450195312, | |
| "logps/rejected": -640.65283203125, | |
| "loss": 0.5747, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -2.914341449737549, | |
| "rewards/margins": 0.838718056678772, | |
| "rewards/rejected": -3.753058910369873, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 10.1875, | |
| "learning_rate": 2.3760892972027328e-08, | |
| "logits/chosen": -1.4296592473983765, | |
| "logits/rejected": -1.253159761428833, | |
| "logps/chosen": -565.5921020507812, | |
| "logps/rejected": -640.4884643554688, | |
| "loss": 0.5445, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -2.93088960647583, | |
| "rewards/margins": 1.0891053676605225, | |
| "rewards/rejected": -4.01999568939209, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 11.875, | |
| "learning_rate": 2.072217594089765e-08, | |
| "logits/chosen": -1.2928217649459839, | |
| "logits/rejected": -1.2739886045455933, | |
| "logps/chosen": -544.60205078125, | |
| "logps/rejected": -657.6286010742188, | |
| "loss": 0.4198, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -2.8306102752685547, | |
| "rewards/margins": 1.2527996301651, | |
| "rewards/rejected": -4.083409786224365, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 8.8125, | |
| "learning_rate": 1.789047789459375e-08, | |
| "logits/chosen": -1.3845082521438599, | |
| "logits/rejected": -1.1975808143615723, | |
| "logps/chosen": -600.7689819335938, | |
| "logps/rejected": -661.3809204101562, | |
| "loss": 0.5254, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -2.837923049926758, | |
| "rewards/margins": 1.0966850519180298, | |
| "rewards/rejected": -3.934607744216919, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 6.875, | |
| "learning_rate": 1.5266035279088708e-08, | |
| "logits/chosen": -1.2211766242980957, | |
| "logits/rejected": -1.0900758504867554, | |
| "logps/chosen": -589.4644775390625, | |
| "logps/rejected": -672.6224975585938, | |
| "loss": 0.4945, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -2.928469181060791, | |
| "rewards/margins": 1.0935529470443726, | |
| "rewards/rejected": -4.022022247314453, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "eval_logits/chosen": -1.252778172492981, | |
| "eval_logits/rejected": -1.1321126222610474, | |
| "eval_logps/chosen": -545.6665649414062, | |
| "eval_logps/rejected": -635.412353515625, | |
| "eval_loss": 0.486397385597229, | |
| "eval_rewards/accuracies": 0.7400000095367432, | |
| "eval_rewards/chosen": -2.8101449012756348, | |
| "eval_rewards/margins": 1.0978920459747314, | |
| "eval_rewards/rejected": -3.9080374240875244, | |
| "eval_runtime": 385.1334, | |
| "eval_samples_per_second": 5.193, | |
| "eval_steps_per_second": 0.649, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 12.1875, | |
| "learning_rate": 1.2849067234584623e-08, | |
| "logits/chosen": -1.2232288122177124, | |
| "logits/rejected": -1.1615407466888428, | |
| "logps/chosen": -513.3945922851562, | |
| "logps/rejected": -625.0435791015625, | |
| "loss": 0.4715, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -2.7703394889831543, | |
| "rewards/margins": 1.192608118057251, | |
| "rewards/rejected": -3.9629478454589844, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 11.1875, | |
| "learning_rate": 1.0639775577218625e-08, | |
| "logits/chosen": -1.2154030799865723, | |
| "logits/rejected": -1.0470209121704102, | |
| "logps/chosen": -534.564453125, | |
| "logps/rejected": -611.8311767578125, | |
| "loss": 0.5179, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -2.8537747859954834, | |
| "rewards/margins": 1.1320233345031738, | |
| "rewards/rejected": -3.985797882080078, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 10.3125, | |
| "learning_rate": 8.638344782207486e-09, | |
| "logits/chosen": -1.2473368644714355, | |
| "logits/rejected": -1.1350939273834229, | |
| "logps/chosen": -516.2252197265625, | |
| "logps/rejected": -601.020263671875, | |
| "loss": 0.4856, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -2.709373950958252, | |
| "rewards/margins": 1.07589852809906, | |
| "rewards/rejected": -3.7852725982666016, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 9.3125, | |
| "learning_rate": 6.84494196844715e-09, | |
| "logits/chosen": -1.2988349199295044, | |
| "logits/rejected": -1.1963183879852295, | |
| "logps/chosen": -549.7848510742188, | |
| "logps/rejected": -670.5635986328125, | |
| "loss": 0.4567, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -2.7767837047576904, | |
| "rewards/margins": 1.3236409425735474, | |
| "rewards/rejected": -4.100424766540527, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 7.96875, | |
| "learning_rate": 5.259716884556121e-09, | |
| "logits/chosen": -1.3606340885162354, | |
| "logits/rejected": -1.224469780921936, | |
| "logps/chosen": -543.1236572265625, | |
| "logps/rejected": -640.4927978515625, | |
| "loss": 0.4694, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -2.7865681648254395, | |
| "rewards/margins": 1.1219072341918945, | |
| "rewards/rejected": -3.908475399017334, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 9.0, | |
| "learning_rate": 3.882801896372967e-09, | |
| "logits/chosen": -1.3460079431533813, | |
| "logits/rejected": -1.2832306623458862, | |
| "logps/chosen": -539.73583984375, | |
| "logps/rejected": -619.1431884765625, | |
| "loss": 0.4913, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -2.7437405586242676, | |
| "rewards/margins": 1.1124091148376465, | |
| "rewards/rejected": -3.8561501502990723, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 12.8125, | |
| "learning_rate": 2.7143119759026614e-09, | |
| "logits/chosen": -1.3690940141677856, | |
| "logits/rejected": -1.1916528940200806, | |
| "logps/chosen": -560.9449462890625, | |
| "logps/rejected": -645.1325073242188, | |
| "loss": 0.4329, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -2.7667250633239746, | |
| "rewards/margins": 1.096879482269287, | |
| "rewards/rejected": -3.8636043071746826, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 7.65625, | |
| "learning_rate": 1.754344691717591e-09, | |
| "logits/chosen": -1.2690956592559814, | |
| "logits/rejected": -1.2165257930755615, | |
| "logps/chosen": -535.2860107421875, | |
| "logps/rejected": -644.5172119140625, | |
| "loss": 0.5302, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -2.8549444675445557, | |
| "rewards/margins": 0.8715957403182983, | |
| "rewards/rejected": -3.7265400886535645, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 10.9375, | |
| "learning_rate": 1.0029802008096335e-09, | |
| "logits/chosen": -1.2903029918670654, | |
| "logits/rejected": -1.1491575241088867, | |
| "logps/chosen": -556.7200927734375, | |
| "logps/rejected": -649.4456176757812, | |
| "loss": 0.4709, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -2.824282169342041, | |
| "rewards/margins": 1.1551063060760498, | |
| "rewards/rejected": -3.979388475418091, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 9.625, | |
| "learning_rate": 4.602812418974534e-10, | |
| "logits/chosen": -1.3866922855377197, | |
| "logits/rejected": -1.2608470916748047, | |
| "logps/chosen": -567.4412231445312, | |
| "logps/rejected": -653.213623046875, | |
| "loss": 0.5013, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -2.856003999710083, | |
| "rewards/margins": 1.1031793355941772, | |
| "rewards/rejected": -3.9591832160949707, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_logits/chosen": -1.2524324655532837, | |
| "eval_logits/rejected": -1.1317205429077148, | |
| "eval_logps/chosen": -545.9131469726562, | |
| "eval_logps/rejected": -635.618408203125, | |
| "eval_loss": 0.48637571930885315, | |
| "eval_rewards/accuracies": 0.7394999861717224, | |
| "eval_rewards/chosen": -2.8126115798950195, | |
| "eval_rewards/margins": 1.0974864959716797, | |
| "eval_rewards/rejected": -3.910098075866699, | |
| "eval_runtime": 385.0016, | |
| "eval_samples_per_second": 5.195, | |
| "eval_steps_per_second": 0.649, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 10.4375, | |
| "learning_rate": 1.2629313018819312e-10, | |
| "logits/chosen": -1.312417984008789, | |
| "logits/rejected": -1.191304326057434, | |
| "logps/chosen": -529.3787231445312, | |
| "logps/rejected": -611.1173095703125, | |
| "loss": 0.5046, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -2.7690625190734863, | |
| "rewards/margins": 0.9739207029342651, | |
| "rewards/rejected": -3.742983341217041, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 21.25, | |
| "learning_rate": 1.0437535929996855e-12, | |
| "logits/chosen": -1.306217908859253, | |
| "logits/rejected": -1.1442514657974243, | |
| "logps/chosen": -570.4493408203125, | |
| "logps/rejected": -660.94970703125, | |
| "loss": 0.4569, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -2.836827516555786, | |
| "rewards/margins": 1.3515799045562744, | |
| "rewards/rejected": -4.188406944274902, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 3821, | |
| "total_flos": 0.0, | |
| "train_loss": 0.5238101308459981, | |
| "train_runtime": 42749.2467, | |
| "train_samples_per_second": 1.43, | |
| "train_steps_per_second": 0.089 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 3821, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "total_flos": 0.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |