| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.9968, |
| "eval_steps": 100, |
| "global_step": 468, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.004266666666666667, |
| "grad_norm": 90.38055419921875, |
| "learning_rate": 1.0638297872340425e-08, |
| "logits/chosen": -0.26953125, |
| "logits/rejected": -0.259765625, |
| "logps/chosen": -304.0, |
| "logps/rejected": -268.0, |
| "loss": 44.25, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.042666666666666665, |
| "grad_norm": 87.35566711425781, |
| "learning_rate": 1.0638297872340425e-07, |
| "logits/chosen": -0.294921875, |
| "logits/rejected": -0.30078125, |
| "logps/chosen": -286.0, |
| "logps/rejected": -288.0, |
| "loss": 44.3997, |
| "rewards/accuracies": 0.2274305522441864, |
| "rewards/chosen": -0.000881195068359375, |
| "rewards/margins": -0.0021514892578125, |
| "rewards/rejected": 0.0012664794921875, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.08533333333333333, |
| "grad_norm": 85.34680938720703, |
| "learning_rate": 2.127659574468085e-07, |
| "logits/chosen": -0.29296875, |
| "logits/rejected": -0.29296875, |
| "logps/chosen": -292.0, |
| "logps/rejected": -300.0, |
| "loss": 44.3934, |
| "rewards/accuracies": 0.22031250596046448, |
| "rewards/chosen": 0.0010833740234375, |
| "rewards/margins": -0.0021209716796875, |
| "rewards/rejected": 0.0031890869140625, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 82.10749816894531, |
| "learning_rate": 3.1914893617021275e-07, |
| "logits/chosen": -0.2890625, |
| "logits/rejected": -0.271484375, |
| "logps/chosen": -302.0, |
| "logps/rejected": -310.0, |
| "loss": 44.441, |
| "rewards/accuracies": 0.2109375, |
| "rewards/chosen": -0.00244140625, |
| "rewards/margins": -0.0030670166015625, |
| "rewards/rejected": 0.0006256103515625, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.17066666666666666, |
| "grad_norm": 81.98179626464844, |
| "learning_rate": 4.25531914893617e-07, |
| "logits/chosen": -0.29296875, |
| "logits/rejected": -0.283203125, |
| "logps/chosen": -288.0, |
| "logps/rejected": -316.0, |
| "loss": 44.3387, |
| "rewards/accuracies": 0.23906250298023224, |
| "rewards/chosen": 0.000263214111328125, |
| "rewards/margins": -0.000209808349609375, |
| "rewards/rejected": 0.0004749298095703125, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.21333333333333335, |
| "grad_norm": 87.4677963256836, |
| "learning_rate": 4.999373573764186e-07, |
| "logits/chosen": -0.291015625, |
| "logits/rejected": -0.28515625, |
| "logps/chosen": -298.0, |
| "logps/rejected": -310.0, |
| "loss": 44.3051, |
| "rewards/accuracies": 0.22812500596046448, |
| "rewards/chosen": 0.0003147125244140625, |
| "rewards/margins": 0.0011749267578125, |
| "rewards/rejected": -0.0008544921875, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 86.16796875, |
| "learning_rate": 4.988245838331339e-07, |
| "logits/chosen": -0.279296875, |
| "logits/rejected": -0.287109375, |
| "logps/chosen": -294.0, |
| "logps/rejected": -304.0, |
| "loss": 44.3219, |
| "rewards/accuracies": 0.22187499701976776, |
| "rewards/chosen": 0.003021240234375, |
| "rewards/margins": 1.1175870895385742e-06, |
| "rewards/rejected": 0.003021240234375, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.2986666666666667, |
| "grad_norm": 91.6921615600586, |
| "learning_rate": 4.963268819535228e-07, |
| "logits/chosen": -0.2890625, |
| "logits/rejected": -0.3125, |
| "logps/chosen": -298.0, |
| "logps/rejected": -314.0, |
| "loss": 44.3543, |
| "rewards/accuracies": 0.22812500596046448, |
| "rewards/chosen": -0.0024566650390625, |
| "rewards/margins": -0.0003814697265625, |
| "rewards/rejected": -0.0020751953125, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.3413333333333333, |
| "grad_norm": 79.45140838623047, |
| "learning_rate": 4.924581536521611e-07, |
| "logits/chosen": -0.296875, |
| "logits/rejected": -0.298828125, |
| "logps/chosen": -286.0, |
| "logps/rejected": -300.0, |
| "loss": 44.2738, |
| "rewards/accuracies": 0.25312501192092896, |
| "rewards/chosen": 0.00150299072265625, |
| "rewards/margins": 0.00183868408203125, |
| "rewards/rejected": -0.0003414154052734375, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.384, |
| "grad_norm": 78.84862518310547, |
| "learning_rate": 4.872399318152593e-07, |
| "logits/chosen": -0.26953125, |
| "logits/rejected": -0.287109375, |
| "logps/chosen": -296.0, |
| "logps/rejected": -304.0, |
| "loss": 44.3367, |
| "rewards/accuracies": 0.2515625059604645, |
| "rewards/chosen": 0.00152587890625, |
| "rewards/margins": 0.00023651123046875, |
| "rewards/rejected": 0.00128936767578125, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.4266666666666667, |
| "grad_norm": 79.79137420654297, |
| "learning_rate": 4.807012604511541e-07, |
| "logits/chosen": -0.28515625, |
| "logits/rejected": -0.28515625, |
| "logps/chosen": -290.0, |
| "logps/rejected": -312.0, |
| "loss": 44.284, |
| "rewards/accuracies": 0.24531249701976776, |
| "rewards/chosen": 0.0005950927734375, |
| "rewards/margins": 0.0023040771484375, |
| "rewards/rejected": -0.001708984375, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.4266666666666667, |
| "eval_logits/chosen": -0.298828125, |
| "eval_logits/rejected": -0.29296875, |
| "eval_logps/chosen": -284.0, |
| "eval_logps/rejected": -310.0, |
| "eval_loss": 0.6929380893707275, |
| "eval_rewards/accuracies": 0.2447139173746109, |
| "eval_rewards/chosen": -0.00069427490234375, |
| "eval_rewards/margins": -0.0003871917724609375, |
| "eval_rewards/rejected": -0.0003070831298828125, |
| "eval_runtime": 625.2651, |
| "eval_samples_per_second": 1.713, |
| "eval_steps_per_second": 0.429, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.4693333333333333, |
| "grad_norm": 84.69393157958984, |
| "learning_rate": 4.7287853303477696e-07, |
| "logits/chosen": -0.28125, |
| "logits/rejected": -0.291015625, |
| "logps/chosen": -306.0, |
| "logps/rejected": -312.0, |
| "loss": 44.3285, |
| "rewards/accuracies": 0.23906250298023224, |
| "rewards/chosen": -0.00151824951171875, |
| "rewards/margins": 0.00013446807861328125, |
| "rewards/rejected": -0.00165557861328125, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.512, |
| "grad_norm": 79.28882598876953, |
| "learning_rate": 4.638152899458579e-07, |
| "logits/chosen": -0.30859375, |
| "logits/rejected": -0.29296875, |
| "logps/chosen": -288.0, |
| "logps/rejected": -306.0, |
| "loss": 44.3582, |
| "rewards/accuracies": 0.24531249701976776, |
| "rewards/chosen": 0.000576019287109375, |
| "rewards/margins": -0.00124359130859375, |
| "rewards/rejected": 0.0018157958984375, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.5546666666666666, |
| "grad_norm": 85.38533782958984, |
| "learning_rate": 4.535619761282988e-07, |
| "logits/chosen": -0.3046875, |
| "logits/rejected": -0.294921875, |
| "logps/chosen": -290.0, |
| "logps/rejected": -304.0, |
| "loss": 44.1629, |
| "rewards/accuracies": 0.2718749940395355, |
| "rewards/chosen": 0.0018768310546875, |
| "rewards/margins": 0.004669189453125, |
| "rewards/rejected": -0.0028076171875, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.5973333333333334, |
| "grad_norm": 85.52645111083984, |
| "learning_rate": 4.42175660319555e-07, |
| "logits/chosen": -0.32421875, |
| "logits/rejected": -0.322265625, |
| "logps/chosen": -292.0, |
| "logps/rejected": -312.0, |
| "loss": 44.3742, |
| "rewards/accuracies": 0.22968749701976776, |
| "rewards/chosen": -0.000789642333984375, |
| "rewards/margins": -0.0016326904296875, |
| "rewards/rejected": 0.0008392333984375, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 80.57353210449219, |
| "learning_rate": 4.2971971741276185e-07, |
| "logits/chosen": -0.271484375, |
| "logits/rejected": -0.287109375, |
| "logps/chosen": -286.0, |
| "logps/rejected": -292.0, |
| "loss": 44.3723, |
| "rewards/accuracies": 0.2671875059604645, |
| "rewards/chosen": -5.3882598876953125e-05, |
| "rewards/margins": -0.00075531005859375, |
| "rewards/rejected": 0.000701904296875, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.6826666666666666, |
| "grad_norm": 89.16075134277344, |
| "learning_rate": 4.162634757195417e-07, |
| "logits/chosen": -0.296875, |
| "logits/rejected": -0.27734375, |
| "logps/chosen": -292.0, |
| "logps/rejected": -304.0, |
| "loss": 44.2527, |
| "rewards/accuracies": 0.2671875059604645, |
| "rewards/chosen": -0.0032501220703125, |
| "rewards/margins": 0.003021240234375, |
| "rewards/rejected": -0.00628662109375, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.7253333333333334, |
| "grad_norm": 86.98834991455078, |
| "learning_rate": 4.018818310967842e-07, |
| "logits/chosen": -0.302734375, |
| "logits/rejected": -0.3203125, |
| "logps/chosen": -290.0, |
| "logps/rejected": -306.0, |
| "loss": 44.3082, |
| "rewards/accuracies": 0.23906250298023224, |
| "rewards/chosen": -0.0023956298828125, |
| "rewards/margins": 0.00019931793212890625, |
| "rewards/rejected": -0.002593994140625, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.768, |
| "grad_norm": 101.01860809326172, |
| "learning_rate": 3.8665483008512536e-07, |
| "logits/chosen": -0.310546875, |
| "logits/rejected": -0.31640625, |
| "logps/chosen": -292.0, |
| "logps/rejected": -320.0, |
| "loss": 44.1203, |
| "rewards/accuracies": 0.27812498807907104, |
| "rewards/chosen": 0.003509521484375, |
| "rewards/margins": 0.00689697265625, |
| "rewards/rejected": -0.003387451171875, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.8106666666666666, |
| "grad_norm": 91.42887115478516, |
| "learning_rate": 3.706672243793271e-07, |
| "logits/chosen": -0.302734375, |
| "logits/rejected": -0.291015625, |
| "logps/chosen": -292.0, |
| "logps/rejected": -314.0, |
| "loss": 44.3207, |
| "rewards/accuracies": 0.26249998807907104, |
| "rewards/chosen": -0.0030059814453125, |
| "rewards/margins": 0.001190185546875, |
| "rewards/rejected": -0.004180908203125, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.8533333333333334, |
| "grad_norm": 91.25189208984375, |
| "learning_rate": 3.540079991103235e-07, |
| "logits/chosen": -0.287109375, |
| "logits/rejected": -0.26953125, |
| "logps/chosen": -294.0, |
| "logps/rejected": -306.0, |
| "loss": 44.2477, |
| "rewards/accuracies": 0.2593750059604645, |
| "rewards/chosen": 6.341934204101562e-05, |
| "rewards/margins": 0.0026702880859375, |
| "rewards/rejected": -0.0026092529296875, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.8533333333333334, |
| "eval_logits/chosen": -0.298828125, |
| "eval_logits/rejected": -0.29296875, |
| "eval_logps/chosen": -286.0, |
| "eval_logps/rejected": -310.0, |
| "eval_loss": 0.690421462059021, |
| "eval_rewards/accuracies": 0.28358209133148193, |
| "eval_rewards/chosen": -0.003387451171875, |
| "eval_rewards/margins": 0.0054931640625, |
| "eval_rewards/rejected": -0.0089111328125, |
| "eval_runtime": 621.0126, |
| "eval_samples_per_second": 1.725, |
| "eval_steps_per_second": 0.432, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.896, |
| "grad_norm": 93.56558990478516, |
| "learning_rate": 3.367698775644589e-07, |
| "logits/chosen": -0.302734375, |
| "logits/rejected": -0.30859375, |
| "logps/chosen": -286.0, |
| "logps/rejected": -328.0, |
| "loss": 44.209, |
| "rewards/accuracies": 0.2578125, |
| "rewards/chosen": -0.0040283203125, |
| "rewards/margins": 0.003875732421875, |
| "rewards/rejected": -0.0079345703125, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.9386666666666666, |
| "grad_norm": 84.76224517822266, |
| "learning_rate": 3.1904880509659394e-07, |
| "logits/chosen": -0.30859375, |
| "logits/rejected": -0.310546875, |
| "logps/chosen": -288.0, |
| "logps/rejected": -292.0, |
| "loss": 44.2164, |
| "rewards/accuracies": 0.27656251192092896, |
| "rewards/chosen": -0.003265380859375, |
| "rewards/margins": 0.004638671875, |
| "rewards/rejected": -0.0079345703125, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.9813333333333333, |
| "grad_norm": 88.0267333984375, |
| "learning_rate": 3.0094341510955693e-07, |
| "logits/chosen": -0.30859375, |
| "logits/rejected": -0.306640625, |
| "logps/chosen": -294.0, |
| "logps/rejected": -314.0, |
| "loss": 44.2543, |
| "rewards/accuracies": 0.265625, |
| "rewards/chosen": -0.002471923828125, |
| "rewards/margins": 0.0021514892578125, |
| "rewards/rejected": -0.004608154296875, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.024, |
| "grad_norm": 87.41796112060547, |
| "learning_rate": 2.825544800722376e-07, |
| "logits/chosen": -0.296875, |
| "logits/rejected": -0.310546875, |
| "logps/chosen": -288.0, |
| "logps/rejected": -296.0, |
| "loss": 44.1758, |
| "rewards/accuracies": 0.2734375, |
| "rewards/chosen": -0.0040283203125, |
| "rewards/margins": 0.0047607421875, |
| "rewards/rejected": -0.0087890625, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.0666666666666667, |
| "grad_norm": 82.33610534667969, |
| "learning_rate": 2.639843506318899e-07, |
| "logits/chosen": -0.2890625, |
| "logits/rejected": -0.279296875, |
| "logps/chosen": -292.0, |
| "logps/rejected": -308.0, |
| "loss": 44.127, |
| "rewards/accuracies": 0.3062500059604645, |
| "rewards/chosen": 0.000637054443359375, |
| "rewards/margins": 0.006622314453125, |
| "rewards/rejected": -0.0059814453125, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.1093333333333333, |
| "grad_norm": 92.11223602294922, |
| "learning_rate": 2.453363859424809e-07, |
| "logits/chosen": -0.29296875, |
| "logits/rejected": -0.28515625, |
| "logps/chosen": -298.0, |
| "logps/rejected": -308.0, |
| "loss": 44.2828, |
| "rewards/accuracies": 0.2750000059604645, |
| "rewards/chosen": -0.00147247314453125, |
| "rewards/margins": 0.002899169921875, |
| "rewards/rejected": -0.004364013671875, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.152, |
| "grad_norm": 87.36744689941406, |
| "learning_rate": 2.267143783798094e-07, |
| "logits/chosen": -0.29296875, |
| "logits/rejected": -0.29296875, |
| "logps/chosen": -286.0, |
| "logps/rejected": -318.0, |
| "loss": 44.1617, |
| "rewards/accuracies": 0.28437501192092896, |
| "rewards/chosen": -0.0019683837890625, |
| "rewards/margins": 0.00701904296875, |
| "rewards/rejected": -0.00897216796875, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.1946666666666665, |
| "grad_norm": 83.57527923583984, |
| "learning_rate": 2.0822197584536287e-07, |
| "logits/chosen": -0.291015625, |
| "logits/rejected": -0.275390625, |
| "logps/chosen": -292.0, |
| "logps/rejected": -296.0, |
| "loss": 44.3105, |
| "rewards/accuracies": 0.2890625, |
| "rewards/chosen": -0.0037384033203125, |
| "rewards/margins": 0.0023040771484375, |
| "rewards/rejected": -0.00604248046875, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.2373333333333334, |
| "grad_norm": 88.8481216430664, |
| "learning_rate": 1.899621048743019e-07, |
| "logits/chosen": -0.298828125, |
| "logits/rejected": -0.298828125, |
| "logps/chosen": -290.0, |
| "logps/rejected": -312.0, |
| "loss": 44.1246, |
| "rewards/accuracies": 0.31718748807907104, |
| "rewards/chosen": -0.003448486328125, |
| "rewards/margins": 0.00738525390625, |
| "rewards/rejected": -0.0108642578125, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.28, |
| "grad_norm": 89.92357635498047, |
| "learning_rate": 1.7203639775848423e-07, |
| "logits/chosen": -0.294921875, |
| "logits/rejected": -0.29296875, |
| "logps/chosen": -290.0, |
| "logps/rejected": -308.0, |
| "loss": 44.1766, |
| "rewards/accuracies": 0.2984375059604645, |
| "rewards/chosen": -0.00113677978515625, |
| "rewards/margins": 0.0068359375, |
| "rewards/rejected": -0.00799560546875, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.28, |
| "eval_logits/chosen": -0.298828125, |
| "eval_logits/rejected": -0.29296875, |
| "eval_logps/chosen": -286.0, |
| "eval_logps/rejected": -310.0, |
| "eval_loss": 0.6902318000793457, |
| "eval_rewards/accuracies": 0.3224502503871918, |
| "eval_rewards/chosen": -0.0027618408203125, |
| "eval_rewards/margins": 0.006195068359375, |
| "eval_rewards/rejected": -0.00897216796875, |
| "eval_runtime": 616.3156, |
| "eval_samples_per_second": 1.738, |
| "eval_steps_per_second": 0.435, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.3226666666666667, |
| "grad_norm": 90.47930145263672, |
| "learning_rate": 1.5454462687309444e-07, |
| "logits/chosen": -0.2890625, |
| "logits/rejected": -0.302734375, |
| "logps/chosen": -292.0, |
| "logps/rejected": -300.0, |
| "loss": 44.0316, |
| "rewards/accuracies": 0.3453125059604645, |
| "rewards/chosen": -0.0012054443359375, |
| "rewards/margins": 0.01055908203125, |
| "rewards/rejected": -0.01177978515625, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.3653333333333333, |
| "grad_norm": 94.8127670288086, |
| "learning_rate": 1.3758414935535145e-07, |
| "logits/chosen": -0.287109375, |
| "logits/rejected": -0.296875, |
| "logps/chosen": -286.0, |
| "logps/rejected": -302.0, |
| "loss": 44.0305, |
| "rewards/accuracies": 0.3343749940395355, |
| "rewards/chosen": 0.000606536865234375, |
| "rewards/margins": 0.010498046875, |
| "rewards/rejected": -0.0098876953125, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.408, |
| "grad_norm": 89.58834838867188, |
| "learning_rate": 1.212493652261462e-07, |
| "logits/chosen": -0.298828125, |
| "logits/rejected": -0.275390625, |
| "logps/chosen": -290.0, |
| "logps/rejected": -296.0, |
| "loss": 44.1727, |
| "rewards/accuracies": 0.30937498807907104, |
| "rewards/chosen": -0.00131988525390625, |
| "rewards/margins": 0.00592041015625, |
| "rewards/rejected": -0.00726318359375, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.4506666666666668, |
| "grad_norm": 88.79105377197266, |
| "learning_rate": 1.0563119197063933e-07, |
| "logits/chosen": -0.28515625, |
| "logits/rejected": -0.30078125, |
| "logps/chosen": -290.0, |
| "logps/rejected": -308.0, |
| "loss": 44.1437, |
| "rewards/accuracies": 0.3031249940395355, |
| "rewards/chosen": -0.003875732421875, |
| "rewards/margins": 0.00640869140625, |
| "rewards/rejected": -0.01025390625, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.4933333333333334, |
| "grad_norm": 84.3297348022461, |
| "learning_rate": 9.081655850224449e-08, |
| "logits/chosen": -0.291015625, |
| "logits/rejected": -0.298828125, |
| "logps/chosen": -290.0, |
| "logps/rejected": -300.0, |
| "loss": 44.4195, |
| "rewards/accuracies": 0.26875001192092896, |
| "rewards/chosen": -0.00689697265625, |
| "rewards/margins": -0.0015411376953125, |
| "rewards/rejected": -0.00537109375, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.536, |
| "grad_norm": 82.21558380126953, |
| "learning_rate": 7.68879213265311e-08, |
| "logits/chosen": -0.298828125, |
| "logits/rejected": -0.283203125, |
| "logps/chosen": -284.0, |
| "logps/rejected": -308.0, |
| "loss": 44.0789, |
| "rewards/accuracies": 0.33125001192092896, |
| "rewards/chosen": -0.0019989013671875, |
| "rewards/margins": 0.00897216796875, |
| "rewards/rejected": -0.010986328125, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.5786666666666667, |
| "grad_norm": 91.78382110595703, |
| "learning_rate": 6.392280559802341e-08, |
| "logits/chosen": -0.302734375, |
| "logits/rejected": -0.30078125, |
| "logps/chosen": -288.0, |
| "logps/rejected": -316.0, |
| "loss": 44.1172, |
| "rewards/accuracies": 0.2953124940395355, |
| "rewards/chosen": -0.0025634765625, |
| "rewards/margins": 0.007415771484375, |
| "rewards/rejected": -0.00994873046875, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.6213333333333333, |
| "grad_norm": 82.35102844238281, |
| "learning_rate": 5.199337362431791e-08, |
| "logits/chosen": -0.298828125, |
| "logits/rejected": -0.3046875, |
| "logps/chosen": -292.0, |
| "logps/rejected": -296.0, |
| "loss": 44.1281, |
| "rewards/accuracies": 0.3031249940395355, |
| "rewards/chosen": -0.002838134765625, |
| "rewards/margins": 0.006866455078125, |
| "rewards/rejected": -0.00970458984375, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.6640000000000001, |
| "grad_norm": 80.81761932373047, |
| "learning_rate": 4.116602321917617e-08, |
| "logits/chosen": -0.283203125, |
| "logits/rejected": -0.283203125, |
| "logps/chosen": -274.0, |
| "logps/rejected": -290.0, |
| "loss": 44.1797, |
| "rewards/accuracies": 0.3265624940395355, |
| "rewards/chosen": -0.0029296875, |
| "rewards/margins": 0.006195068359375, |
| "rewards/rejected": -0.0091552734375, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.7066666666666666, |
| "grad_norm": 87.5019302368164, |
| "learning_rate": 3.150101814011136e-08, |
| "logits/chosen": -0.302734375, |
| "logits/rejected": -0.2890625, |
| "logps/chosen": -298.0, |
| "logps/rejected": -312.0, |
| "loss": 44.0922, |
| "rewards/accuracies": 0.3125, |
| "rewards/chosen": -0.0045166015625, |
| "rewards/margins": 0.007110595703125, |
| "rewards/rejected": -0.01165771484375, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.7066666666666666, |
| "eval_logits/chosen": -0.298828125, |
| "eval_logits/rejected": -0.29296875, |
| "eval_logps/chosen": -286.0, |
| "eval_logps/rejected": -310.0, |
| "eval_loss": 0.6897431015968323, |
| "eval_rewards/accuracies": 0.323383092880249, |
| "eval_rewards/chosen": -0.006591796875, |
| "eval_rewards/margins": 0.007232666015625, |
| "eval_rewards/rejected": -0.0137939453125, |
| "eval_runtime": 599.0571, |
| "eval_samples_per_second": 1.788, |
| "eval_steps_per_second": 0.447, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.7493333333333334, |
| "grad_norm": 87.16879272460938, |
| "learning_rate": 2.3052152667409287e-08, |
| "logits/chosen": -0.287109375, |
| "logits/rejected": -0.298828125, |
| "logps/chosen": -306.0, |
| "logps/rejected": -318.0, |
| "loss": 44.3031, |
| "rewards/accuracies": 0.3125, |
| "rewards/chosen": -0.00921630859375, |
| "rewards/margins": 0.00311279296875, |
| "rewards/rejected": -0.0123291015625, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.792, |
| "grad_norm": 83.92235565185547, |
| "learning_rate": 1.5866452191498486e-08, |
| "logits/chosen": -0.314453125, |
| "logits/rejected": -0.310546875, |
| "logps/chosen": -290.0, |
| "logps/rejected": -304.0, |
| "loss": 44.1168, |
| "rewards/accuracies": 0.31718748807907104, |
| "rewards/chosen": -0.0040283203125, |
| "rewards/margins": 0.0078125, |
| "rewards/rejected": -0.0118408203125, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.8346666666666667, |
| "grad_norm": 78.69657897949219, |
| "learning_rate": 9.983911475163725e-09, |
| "logits/chosen": -0.294921875, |
| "logits/rejected": -0.30859375, |
| "logps/chosen": -292.0, |
| "logps/rejected": -310.0, |
| "loss": 43.9914, |
| "rewards/accuracies": 0.3031249940395355, |
| "rewards/chosen": 0.00072479248046875, |
| "rewards/margins": 0.01123046875, |
| "rewards/rejected": -0.010498046875, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.8773333333333333, |
| "grad_norm": 96.32901763916016, |
| "learning_rate": 5.437272047405711e-09, |
| "logits/chosen": -0.294921875, |
| "logits/rejected": -0.294921875, |
| "logps/chosen": -304.0, |
| "logps/rejected": -310.0, |
| "loss": 44.2094, |
| "rewards/accuracies": 0.296875, |
| "rewards/chosen": -0.00567626953125, |
| "rewards/margins": 0.00445556640625, |
| "rewards/rejected": -0.0101318359375, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.92, |
| "grad_norm": 85.1646728515625, |
| "learning_rate": 2.251839967945535e-09, |
| "logits/chosen": -0.298828125, |
| "logits/rejected": -0.3046875, |
| "logps/chosen": -300.0, |
| "logps/rejected": -308.0, |
| "loss": 44.1816, |
| "rewards/accuracies": 0.30937498807907104, |
| "rewards/chosen": -0.006622314453125, |
| "rewards/margins": 0.006134033203125, |
| "rewards/rejected": -0.01275634765625, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.9626666666666668, |
| "grad_norm": 89.11353302001953, |
| "learning_rate": 4.453449766758932e-10, |
| "logits/chosen": -0.291015625, |
| "logits/rejected": -0.294921875, |
| "logps/chosen": -296.0, |
| "logps/rejected": -314.0, |
| "loss": 44.3098, |
| "rewards/accuracies": 0.27031248807907104, |
| "rewards/chosen": -0.00543212890625, |
| "rewards/margins": 0.00145721435546875, |
| "rewards/rejected": -0.006866455078125, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.9968, |
| "step": 468, |
| "total_flos": 0.0, |
| "train_loss": 44.232705662393165, |
| "train_runtime": 24109.1423, |
| "train_samples_per_second": 1.244, |
| "train_steps_per_second": 0.019 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 468, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": false, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|