| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9959925193694897, | |
| "eval_steps": 400, | |
| "global_step": 233, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02137323002938819, | |
| "grad_norm": 7.331315052451733, | |
| "learning_rate": 2.0833333333333333e-07, | |
| "logits/chosen": -0.94921875, | |
| "logits/rejected": -0.90625, | |
| "logps/chosen": -0.28515625, | |
| "logps/rejected": -0.2890625, | |
| "loss": 1.5981, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -0.71484375, | |
| "rewards/margins": 0.0072021484375, | |
| "rewards/rejected": -0.72265625, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.04274646005877638, | |
| "grad_norm": 16.131056100838176, | |
| "learning_rate": 4.1666666666666667e-07, | |
| "logits/chosen": -0.8984375, | |
| "logits/rejected": -0.921875, | |
| "logps/chosen": -0.3046875, | |
| "logps/rejected": -0.330078125, | |
| "loss": 1.5742, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.76171875, | |
| "rewards/margins": 0.06494140625, | |
| "rewards/rejected": -0.828125, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.06411969008816458, | |
| "grad_norm": 9.687597283790334, | |
| "learning_rate": 6.249999999999999e-07, | |
| "logits/chosen": -1.0, | |
| "logits/rejected": -0.9296875, | |
| "logps/chosen": -0.296875, | |
| "logps/rejected": -0.31640625, | |
| "loss": 1.5803, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -0.7421875, | |
| "rewards/margins": 0.048828125, | |
| "rewards/rejected": -0.7890625, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.08549292011755276, | |
| "grad_norm": 7.802540958166764, | |
| "learning_rate": 8.333333333333333e-07, | |
| "logits/chosen": -0.99609375, | |
| "logits/rejected": -0.9375, | |
| "logps/chosen": -0.283203125, | |
| "logps/rejected": -0.302734375, | |
| "loss": 1.5668, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.7109375, | |
| "rewards/margins": 0.04443359375, | |
| "rewards/rejected": -0.75390625, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.10686615014694095, | |
| "grad_norm": 11.510345597750495, | |
| "learning_rate": 9.999435142363483e-07, | |
| "logits/chosen": -0.95703125, | |
| "logits/rejected": -0.91796875, | |
| "logps/chosen": -0.306640625, | |
| "logps/rejected": -0.326171875, | |
| "loss": 1.5727, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -0.765625, | |
| "rewards/margins": 0.048583984375, | |
| "rewards/rejected": -0.81640625, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.12823938017632916, | |
| "grad_norm": 13.270905394176456, | |
| "learning_rate": 9.97967852255038e-07, | |
| "logits/chosen": -0.98828125, | |
| "logits/rejected": -0.97265625, | |
| "logps/chosen": -0.3046875, | |
| "logps/rejected": -0.34765625, | |
| "loss": 1.5622, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.76171875, | |
| "rewards/margins": 0.10791015625, | |
| "rewards/rejected": -0.87109375, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.14961261020571734, | |
| "grad_norm": 7.551741305333013, | |
| "learning_rate": 9.931806517013612e-07, | |
| "logits/chosen": -0.93359375, | |
| "logits/rejected": -0.9140625, | |
| "logps/chosen": -0.3671875, | |
| "logps/rejected": -0.447265625, | |
| "loss": 1.5496, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.91796875, | |
| "rewards/margins": 0.2001953125, | |
| "rewards/rejected": -1.1171875, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.17098584023510552, | |
| "grad_norm": 9.836362551399633, | |
| "learning_rate": 9.856089412257604e-07, | |
| "logits/chosen": -0.9453125, | |
| "logits/rejected": -0.91796875, | |
| "logps/chosen": -0.330078125, | |
| "logps/rejected": -0.37109375, | |
| "loss": 1.5409, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.82421875, | |
| "rewards/margins": 0.1044921875, | |
| "rewards/rejected": -0.9296875, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.19235907026449373, | |
| "grad_norm": 8.767581425134471, | |
| "learning_rate": 9.752954708892377e-07, | |
| "logits/chosen": -1.015625, | |
| "logits/rejected": -0.9609375, | |
| "logps/chosen": -0.38671875, | |
| "logps/rejected": -0.42578125, | |
| "loss": 1.5454, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.96875, | |
| "rewards/margins": 0.09423828125, | |
| "rewards/rejected": -1.0625, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.2137323002938819, | |
| "grad_norm": 10.8544145228066, | |
| "learning_rate": 9.62298470795473e-07, | |
| "logits/chosen": -1.0078125, | |
| "logits/rejected": -0.984375, | |
| "logps/chosen": -0.3984375, | |
| "logps/rejected": -0.421875, | |
| "loss": 1.5737, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.9921875, | |
| "rewards/margins": 0.061767578125, | |
| "rewards/rejected": -1.0546875, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.2351055303232701, | |
| "grad_norm": 10.212862305253141, | |
| "learning_rate": 9.466913223222465e-07, | |
| "logits/chosen": -0.97265625, | |
| "logits/rejected": -0.92578125, | |
| "logps/chosen": -0.35546875, | |
| "logps/rejected": -0.4609375, | |
| "loss": 1.5398, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.890625, | |
| "rewards/margins": 0.263671875, | |
| "rewards/rejected": -1.1484375, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.2564787603526583, | |
| "grad_norm": 12.631724574842627, | |
| "learning_rate": 9.285621438083997e-07, | |
| "logits/chosen": -1.015625, | |
| "logits/rejected": -0.92578125, | |
| "logps/chosen": -0.38671875, | |
| "logps/rejected": -0.48828125, | |
| "loss": 1.504, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.96484375, | |
| "rewards/margins": 0.255859375, | |
| "rewards/rejected": -1.21875, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.2778519903820465, | |
| "grad_norm": 10.0800410336527, | |
| "learning_rate": 9.080132930355566e-07, | |
| "logits/chosen": -1.046875, | |
| "logits/rejected": -1.0234375, | |
| "logps/chosen": -0.423828125, | |
| "logps/rejected": -0.5546875, | |
| "loss": 1.4833, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -1.0625, | |
| "rewards/margins": 0.326171875, | |
| "rewards/rejected": -1.3828125, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.2992252204114347, | |
| "grad_norm": 11.90187451004937, | |
| "learning_rate": 8.851607893136064e-07, | |
| "logits/chosen": -1.0625, | |
| "logits/rejected": -1.0, | |
| "logps/chosen": -0.47265625, | |
| "logps/rejected": -0.5390625, | |
| "loss": 1.5116, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -1.1796875, | |
| "rewards/margins": 0.1669921875, | |
| "rewards/rejected": -1.3515625, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.32059845044082286, | |
| "grad_norm": 10.51424506148012, | |
| "learning_rate": 8.601336584328658e-07, | |
| "logits/chosen": -0.9296875, | |
| "logits/rejected": -0.91015625, | |
| "logps/chosen": -0.46484375, | |
| "logps/rejected": -0.6015625, | |
| "loss": 1.4666, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -1.1640625, | |
| "rewards/margins": 0.34375, | |
| "rewards/rejected": -1.5078125, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.34197168047021104, | |
| "grad_norm": 11.719501574810968, | |
| "learning_rate": 8.330732041813366e-07, | |
| "logits/chosen": -0.984375, | |
| "logits/rejected": -0.96484375, | |
| "logps/chosen": -0.486328125, | |
| "logps/rejected": -0.625, | |
| "loss": 1.4657, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -1.2109375, | |
| "rewards/margins": 0.345703125, | |
| "rewards/rejected": -1.5625, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.36334491049959927, | |
| "grad_norm": 15.388364264984391, | |
| "learning_rate": 8.041322105400921e-07, | |
| "logits/chosen": -0.9765625, | |
| "logits/rejected": -0.921875, | |
| "logps/chosen": -0.55078125, | |
| "logps/rejected": -0.77734375, | |
| "loss": 1.4418, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -1.3828125, | |
| "rewards/margins": 0.5625, | |
| "rewards/rejected": -1.9453125, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.38471814052898745, | |
| "grad_norm": 16.233063949950242, | |
| "learning_rate": 7.734740790612136e-07, | |
| "logits/chosen": -0.953125, | |
| "logits/rejected": -0.90625, | |
| "logps/chosen": -0.6484375, | |
| "logps/rejected": -0.79296875, | |
| "loss": 1.395, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -1.625, | |
| "rewards/margins": 0.359375, | |
| "rewards/rejected": -1.984375, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.40609137055837563, | |
| "grad_norm": 11.872631923458862, | |
| "learning_rate": 7.412719062986631e-07, | |
| "logits/chosen": -1.0078125, | |
| "logits/rejected": -0.96875, | |
| "logps/chosen": -0.68359375, | |
| "logps/rejected": -0.90234375, | |
| "loss": 1.3857, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -1.7109375, | |
| "rewards/margins": 0.54296875, | |
| "rewards/rejected": -2.25, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.4274646005877638, | |
| "grad_norm": 22.45134823888932, | |
| "learning_rate": 7.077075065009433e-07, | |
| "logits/chosen": -0.86328125, | |
| "logits/rejected": -0.86328125, | |
| "logps/chosen": -0.75, | |
| "logps/rejected": -0.984375, | |
| "loss": 1.3902, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.875, | |
| "rewards/margins": 0.5859375, | |
| "rewards/rejected": -2.453125, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.448837830617152, | |
| "grad_norm": 18.006604817685755, | |
| "learning_rate": 6.72970385083438e-07, | |
| "logits/chosen": -0.9296875, | |
| "logits/rejected": -0.890625, | |
| "logps/chosen": -0.8046875, | |
| "logps/rejected": -0.99609375, | |
| "loss": 1.3847, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -2.015625, | |
| "rewards/margins": 0.4765625, | |
| "rewards/rejected": -2.484375, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.4702110606465402, | |
| "grad_norm": 16.753267675250456, | |
| "learning_rate": 6.372566686762426e-07, | |
| "logits/chosen": -0.8515625, | |
| "logits/rejected": -0.81640625, | |
| "logps/chosen": -0.9375, | |
| "logps/rejected": -1.1953125, | |
| "loss": 1.2934, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -2.34375, | |
| "rewards/margins": 0.64453125, | |
| "rewards/rejected": -2.984375, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.4915842906759284, | |
| "grad_norm": 18.88632199685095, | |
| "learning_rate": 6.00767997788451e-07, | |
| "logits/chosen": -0.91015625, | |
| "logits/rejected": -0.875, | |
| "logps/chosen": -1.09375, | |
| "logps/rejected": -1.3359375, | |
| "loss": 1.2884, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -2.734375, | |
| "rewards/margins": 0.59765625, | |
| "rewards/rejected": -3.34375, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.5129575207053166, | |
| "grad_norm": 20.87844659691919, | |
| "learning_rate": 5.637103883409525e-07, | |
| "logits/chosen": -0.89453125, | |
| "logits/rejected": -0.90625, | |
| "logps/chosen": -1.265625, | |
| "logps/rejected": -1.609375, | |
| "loss": 1.2416, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -3.15625, | |
| "rewards/margins": 0.8671875, | |
| "rewards/rejected": -4.03125, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.5343307507347048, | |
| "grad_norm": 19.2002205664862, | |
| "learning_rate": 5.262930684955438e-07, | |
| "logits/chosen": -0.890625, | |
| "logits/rejected": -0.84375, | |
| "logps/chosen": -1.3046875, | |
| "logps/rejected": -1.7578125, | |
| "loss": 1.179, | |
| "rewards/accuracies": 0.8187500238418579, | |
| "rewards/chosen": -3.25, | |
| "rewards/margins": 1.1328125, | |
| "rewards/rejected": -4.40625, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.555703980764093, | |
| "grad_norm": 21.938475092817775, | |
| "learning_rate": 4.88727297347654e-07, | |
| "logits/chosen": -0.8203125, | |
| "logits/rejected": -0.7890625, | |
| "logps/chosen": -1.484375, | |
| "logps/rejected": -1.9921875, | |
| "loss": 1.2199, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -3.703125, | |
| "rewards/margins": 1.2734375, | |
| "rewards/rejected": -4.96875, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.5770772107934812, | |
| "grad_norm": 35.15980683434337, | |
| "learning_rate": 4.512251721523659e-07, | |
| "logits/chosen": -0.80078125, | |
| "logits/rejected": -0.7734375, | |
| "logps/chosen": -1.703125, | |
| "logps/rejected": -2.140625, | |
| "loss": 1.1432, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -4.25, | |
| "rewards/margins": 1.1015625, | |
| "rewards/rejected": -5.375, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.5984504408228694, | |
| "grad_norm": 31.78956774132237, | |
| "learning_rate": 4.139984308181708e-07, | |
| "logits/chosen": -0.68359375, | |
| "logits/rejected": -0.6328125, | |
| "logps/chosen": -1.765625, | |
| "logps/rejected": -2.140625, | |
| "loss": 1.1473, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": -4.40625, | |
| "rewards/margins": 0.94140625, | |
| "rewards/rejected": -5.34375, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.6198236708522575, | |
| "grad_norm": 24.493239330333395, | |
| "learning_rate": 3.772572564296004e-07, | |
| "logits/chosen": -0.671875, | |
| "logits/rejected": -0.6015625, | |
| "logps/chosen": -2.09375, | |
| "logps/rejected": -2.578125, | |
| "loss": 1.113, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -5.25, | |
| "rewards/margins": 1.1953125, | |
| "rewards/rejected": -6.4375, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.6411969008816457, | |
| "grad_norm": 41.1151110363947, | |
| "learning_rate": 3.412090905484337e-07, | |
| "logits/chosen": -0.5234375, | |
| "logits/rejected": -0.48046875, | |
| "logps/chosen": -2.234375, | |
| "logps/rejected": -2.65625, | |
| "loss": 1.1182, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -5.59375, | |
| "rewards/margins": 1.0625, | |
| "rewards/rejected": -6.65625, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.6625701309110339, | |
| "grad_norm": 34.96661272078944, | |
| "learning_rate": 3.060574619936075e-07, | |
| "logits/chosen": -0.57421875, | |
| "logits/rejected": -0.53125, | |
| "logps/chosen": -2.1875, | |
| "logps/rejected": -2.578125, | |
| "loss": 1.1116, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -5.46875, | |
| "rewards/margins": 0.984375, | |
| "rewards/rejected": -6.46875, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.6839433609404221, | |
| "grad_norm": 30.64693471267594, | |
| "learning_rate": 2.720008377125682e-07, | |
| "logits/chosen": -0.65234375, | |
| "logits/rejected": -0.6171875, | |
| "logps/chosen": -2.3125, | |
| "logps/rejected": -2.859375, | |
| "loss": 1.0848, | |
| "rewards/accuracies": 0.831250011920929, | |
| "rewards/chosen": -5.78125, | |
| "rewards/margins": 1.375, | |
| "rewards/rejected": -7.15625, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.7053165909698104, | |
| "grad_norm": 30.547513130259837, | |
| "learning_rate": 2.3923150223207173e-07, | |
| "logits/chosen": -0.478515625, | |
| "logits/rejected": -0.427734375, | |
| "logps/chosen": -2.390625, | |
| "logps/rejected": -2.984375, | |
| "loss": 1.0943, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -5.96875, | |
| "rewards/margins": 1.484375, | |
| "rewards/rejected": -7.46875, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.7266898209991985, | |
| "grad_norm": 34.11282517597963, | |
| "learning_rate": 2.0793447201508286e-07, | |
| "logits/chosen": -0.57421875, | |
| "logits/rejected": -0.51953125, | |
| "logps/chosen": -2.359375, | |
| "logps/rejected": -3.125, | |
| "loss": 1.0601, | |
| "rewards/accuracies": 0.8062499761581421, | |
| "rewards/chosen": -5.90625, | |
| "rewards/margins": 1.921875, | |
| "rewards/rejected": -7.8125, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.7480630510285867, | |
| "grad_norm": 40.72462060392372, | |
| "learning_rate": 1.7828645085333644e-07, | |
| "logits/chosen": -0.55078125, | |
| "logits/rejected": -0.5625, | |
| "logps/chosen": -2.59375, | |
| "logps/rejected": -3.21875, | |
| "loss": 1.0617, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -6.46875, | |
| "rewards/margins": 1.5546875, | |
| "rewards/rejected": -8.0625, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.7694362810579749, | |
| "grad_norm": 35.91258416715483, | |
| "learning_rate": 1.5045483219344385e-07, | |
| "logits/chosen": -0.5859375, | |
| "logits/rejected": -0.62109375, | |
| "logps/chosen": -2.515625, | |
| "logps/rejected": -3.234375, | |
| "loss": 1.0314, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -6.28125, | |
| "rewards/margins": 1.7890625, | |
| "rewards/rejected": -8.0625, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.7908095110873631, | |
| "grad_norm": 40.36681084696311, | |
| "learning_rate": 1.2459675402943288e-07, | |
| "logits/chosen": -0.57421875, | |
| "logits/rejected": -0.5078125, | |
| "logps/chosen": -2.65625, | |
| "logps/rejected": -3.171875, | |
| "loss": 1.0645, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -6.65625, | |
| "rewards/margins": 1.296875, | |
| "rewards/rejected": -7.9375, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.8121827411167513, | |
| "grad_norm": 34.60643699666399, | |
| "learning_rate": 1.0085821169782199e-07, | |
| "logits/chosen": -0.55078125, | |
| "logits/rejected": -0.55078125, | |
| "logps/chosen": -2.6875, | |
| "logps/rejected": -3.34375, | |
| "loss": 1.0308, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -6.75, | |
| "rewards/margins": 1.640625, | |
| "rewards/rejected": -8.375, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.8335559711461394, | |
| "grad_norm": 38.94834120560286, | |
| "learning_rate": 7.937323358440934e-08, | |
| "logits/chosen": -0.515625, | |
| "logits/rejected": -0.53125, | |
| "logps/chosen": -2.59375, | |
| "logps/rejected": -3.359375, | |
| "loss": 1.018, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -6.46875, | |
| "rewards/margins": 1.9296875, | |
| "rewards/rejected": -8.375, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.8549292011755276, | |
| "grad_norm": 33.86005508761931, | |
| "learning_rate": 6.026312439675551e-08, | |
| "logits/chosen": -0.5625, | |
| "logits/rejected": -0.5234375, | |
| "logps/chosen": -2.671875, | |
| "logps/rejected": -3.3125, | |
| "loss": 1.031, | |
| "rewards/accuracies": 0.793749988079071, | |
| "rewards/chosen": -6.65625, | |
| "rewards/margins": 1.59375, | |
| "rewards/rejected": -8.25, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.8763024312049158, | |
| "grad_norm": 35.363713916853904, | |
| "learning_rate": 4.3635780274861864e-08, | |
| "logits/chosen": -0.4765625, | |
| "logits/rejected": -0.4765625, | |
| "logps/chosen": -2.734375, | |
| "logps/rejected": -3.328125, | |
| "loss": 1.0528, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -6.84375, | |
| "rewards/margins": 1.484375, | |
| "rewards/rejected": -8.3125, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.897675661234304, | |
| "grad_norm": 35.71370500438308, | |
| "learning_rate": 2.958507960694784e-08, | |
| "logits/chosen": -0.50390625, | |
| "logits/rejected": -0.470703125, | |
| "logps/chosen": -2.484375, | |
| "logps/rejected": -3.296875, | |
| "loss": 1.0286, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -6.1875, | |
| "rewards/margins": 2.03125, | |
| "rewards/rejected": -8.25, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.9190488912636923, | |
| "grad_norm": 33.989939318779676, | |
| "learning_rate": 1.8190352989793322e-08, | |
| "logits/chosen": -0.5390625, | |
| "logits/rejected": -0.52734375, | |
| "logps/chosen": -2.6875, | |
| "logps/rejected": -3.421875, | |
| "loss": 1.0714, | |
| "rewards/accuracies": 0.831250011920929, | |
| "rewards/chosen": -6.71875, | |
| "rewards/margins": 1.859375, | |
| "rewards/rejected": -8.5625, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.9404221212930804, | |
| "grad_norm": 30.604992224663434, | |
| "learning_rate": 9.515935326265378e-09, | |
| "logits/chosen": -0.51953125, | |
| "logits/rejected": -0.462890625, | |
| "logps/chosen": -2.625, | |
| "logps/rejected": -3.203125, | |
| "loss": 1.0149, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -6.59375, | |
| "rewards/margins": 1.421875, | |
| "rewards/rejected": -8.0, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.9617953513224686, | |
| "grad_norm": 32.491153001452076, | |
| "learning_rate": 3.6108025888958447e-09, | |
| "logits/chosen": -0.54296875, | |
| "logits/rejected": -0.515625, | |
| "logps/chosen": -2.625, | |
| "logps/rejected": -3.296875, | |
| "loss": 1.0096, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -6.5625, | |
| "rewards/margins": 1.6796875, | |
| "rewards/rejected": -8.25, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.9831685813518568, | |
| "grad_norm": 33.176623047011205, | |
| "learning_rate": 5.082953003528456e-10, | |
| "logits/chosen": -0.5546875, | |
| "logits/rejected": -0.54296875, | |
| "logps/chosen": -2.625, | |
| "logps/rejected": -3.3125, | |
| "loss": 0.9971, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -6.5625, | |
| "rewards/margins": 1.7265625, | |
| "rewards/rejected": -8.3125, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.9959925193694897, | |
| "step": 233, | |
| "total_flos": 0.0, | |
| "train_loss": 1.2805403189597724, | |
| "train_runtime": 5384.1773, | |
| "train_samples_per_second": 11.121, | |
| "train_steps_per_second": 0.043 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 233, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 1000000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |