| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 100, |
| "global_step": 478, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "learning_rate": 2.083333333333333e-09, |
| "logits/chosen": -2.322427272796631, |
| "logits/rejected": -2.1875603199005127, |
| "logps/chosen": -316.933837890625, |
| "logps/rejected": -257.42218017578125, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 2.0833333333333335e-08, |
| "logits/chosen": -2.3630495071411133, |
| "logits/rejected": -2.31345272064209, |
| "logps/chosen": -246.93641662597656, |
| "logps/rejected": -213.21914672851562, |
| "loss": 0.6932, |
| "rewards/accuracies": 0.3819444477558136, |
| "rewards/chosen": -0.00016181336832232773, |
| "rewards/margins": -0.0003163775254506618, |
| "rewards/rejected": 0.00015456414257641882, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.166666666666667e-08, |
| "logits/chosen": -2.3567471504211426, |
| "logits/rejected": -2.3047866821289062, |
| "logps/chosen": -271.8526611328125, |
| "logps/rejected": -246.7681884765625, |
| "loss": 0.6932, |
| "rewards/accuracies": 0.550000011920929, |
| "rewards/chosen": 5.23998387507163e-05, |
| "rewards/margins": 0.0003248125431127846, |
| "rewards/rejected": -0.0002724127843976021, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 6.25e-08, |
| "logits/chosen": -2.4557652473449707, |
| "logits/rejected": -2.36832332611084, |
| "logps/chosen": -281.60369873046875, |
| "logps/rejected": -273.43359375, |
| "loss": 0.693, |
| "rewards/accuracies": 0.512499988079071, |
| "rewards/chosen": 0.0001846836385084316, |
| "rewards/margins": 0.0006770413601770997, |
| "rewards/rejected": -0.0004923577653244138, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 8.333333333333334e-08, |
| "logits/chosen": -2.3590247631073, |
| "logits/rejected": -2.282857894897461, |
| "logps/chosen": -292.39532470703125, |
| "logps/rejected": -267.86248779296875, |
| "loss": 0.6924, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 0.0016343205934390426, |
| "rewards/margins": 0.0015354175120592117, |
| "rewards/rejected": 9.890317596727982e-05, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 9.999466228837449e-08, |
| "logits/chosen": -2.364659070968628, |
| "logits/rejected": -2.304342269897461, |
| "logps/chosen": -307.89849853515625, |
| "logps/rejected": -300.7413330078125, |
| "loss": 0.6912, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 0.0037586174439638853, |
| "rewards/margins": 0.0024325354024767876, |
| "rewards/rejected": 0.0013260821579024196, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 9.980796201712733e-08, |
| "logits/chosen": -2.3100428581237793, |
| "logits/rejected": -2.2260587215423584, |
| "logps/chosen": -253.73074340820312, |
| "logps/rejected": -225.9529266357422, |
| "loss": 0.689, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": 0.0078201899304986, |
| "rewards/margins": 0.007128429599106312, |
| "rewards/rejected": 0.0006917613791301847, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 9.935551471796358e-08, |
| "logits/chosen": -2.366241455078125, |
| "logits/rejected": -2.286762237548828, |
| "logps/chosen": -277.2716369628906, |
| "logps/rejected": -247.9556121826172, |
| "loss": 0.687, |
| "rewards/accuracies": 0.6812499761581421, |
| "rewards/chosen": 0.013750630430877209, |
| "rewards/margins": 0.012628299184143543, |
| "rewards/rejected": 0.0011223324108868837, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 9.863973439298597e-08, |
| "logits/chosen": -2.3527064323425293, |
| "logits/rejected": -2.297341823577881, |
| "logps/chosen": -265.49774169921875, |
| "logps/rejected": -272.723876953125, |
| "loss": 0.6841, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": 0.02027452178299427, |
| "rewards/margins": 0.014950519427657127, |
| "rewards/rejected": 0.005324001424014568, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 9.766444003992702e-08, |
| "logits/chosen": -2.3454525470733643, |
| "logits/rejected": -2.252537727355957, |
| "logps/chosen": -262.5143127441406, |
| "logps/rejected": -250.82666015625, |
| "loss": 0.6804, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": 0.025243505835533142, |
| "rewards/margins": 0.027552824467420578, |
| "rewards/rejected": -0.0023093195632100105, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 9.643483527614371e-08, |
| "logits/chosen": -2.3724794387817383, |
| "logits/rejected": -2.27921724319458, |
| "logps/chosen": -279.7447204589844, |
| "logps/rejected": -261.43951416015625, |
| "loss": 0.6773, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": 0.033204130828380585, |
| "rewards/margins": 0.02967449091374874, |
| "rewards/rejected": 0.0035296380519866943, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.21, |
| "eval_logits/chosen": -2.403949499130249, |
| "eval_logits/rejected": -2.356935739517212, |
| "eval_logps/chosen": -259.5997314453125, |
| "eval_logps/rejected": -262.78692626953125, |
| "eval_loss": 0.6766896843910217, |
| "eval_rewards/accuracies": 0.69140625, |
| "eval_rewards/chosen": 0.028116336092352867, |
| "eval_rewards/margins": 0.03584778681397438, |
| "eval_rewards/rejected": -0.007731448858976364, |
| "eval_runtime": 126.1594, |
| "eval_samples_per_second": 15.853, |
| "eval_steps_per_second": 0.254, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 9.495748057506749e-08, |
| "logits/chosen": -2.4030067920684814, |
| "logits/rejected": -2.322254180908203, |
| "logps/chosen": -312.4465637207031, |
| "logps/rejected": -271.04425048828125, |
| "loss": 0.6742, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": 0.030206363648176193, |
| "rewards/margins": 0.04333298280835152, |
| "rewards/rejected": -0.0131266163662076, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 9.324025826323994e-08, |
| "logits/chosen": -2.3183374404907227, |
| "logits/rejected": -2.2286434173583984, |
| "logps/chosen": -277.7622985839844, |
| "logps/rejected": -234.4459686279297, |
| "loss": 0.6678, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": 0.009455936029553413, |
| "rewards/margins": 0.05889623612165451, |
| "rewards/rejected": -0.04944029822945595, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 9.12923304646902e-08, |
| "logits/chosen": -2.3176040649414062, |
| "logits/rejected": -2.2900288105010986, |
| "logps/chosen": -262.83428955078125, |
| "logps/rejected": -294.60919189453125, |
| "loss": 0.6682, |
| "rewards/accuracies": 0.668749988079071, |
| "rewards/chosen": -0.00789323914796114, |
| "rewards/margins": 0.05458872765302658, |
| "rewards/rejected": -0.062481969594955444, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 8.912409021703912e-08, |
| "logits/chosen": -2.4203720092773438, |
| "logits/rejected": -2.3540279865264893, |
| "logps/chosen": -292.56103515625, |
| "logps/rejected": -261.5688171386719, |
| "loss": 0.6617, |
| "rewards/accuracies": 0.7562500238418579, |
| "rewards/chosen": 0.0033874516375362873, |
| "rewards/margins": 0.07644981890916824, |
| "rewards/rejected": -0.07306236028671265, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 8.67471060201467e-08, |
| "logits/chosen": -2.361544609069824, |
| "logits/rejected": -2.2634243965148926, |
| "logps/chosen": -287.91455078125, |
| "logps/rejected": -245.2080078125, |
| "loss": 0.6515, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -0.019349873065948486, |
| "rewards/margins": 0.09038561582565308, |
| "rewards/rejected": -0.10973550379276276, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 8.417406011315998e-08, |
| "logits/chosen": -2.306546449661255, |
| "logits/rejected": -2.24609375, |
| "logps/chosen": -287.05523681640625, |
| "logps/rejected": -282.81243896484375, |
| "loss": 0.6442, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.023438425734639168, |
| "rewards/margins": 0.12676861882209778, |
| "rewards/rejected": -0.150207057595253, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 8.141868080927996e-08, |
| "logits/chosen": -2.3783812522888184, |
| "logits/rejected": -2.2990164756774902, |
| "logps/chosen": -247.52914428710938, |
| "logps/rejected": -240.2356719970703, |
| "loss": 0.6507, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -0.01242762990295887, |
| "rewards/margins": 0.09276925027370453, |
| "rewards/rejected": -0.10519689321517944, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 7.849566924927082e-08, |
| "logits/chosen": -2.4011363983154297, |
| "logits/rejected": -2.3967764377593994, |
| "logps/chosen": -297.4763488769531, |
| "logps/rejected": -298.1854553222656, |
| "loss": 0.6404, |
| "rewards/accuracies": 0.6937500238418579, |
| "rewards/chosen": -0.04231880232691765, |
| "rewards/margins": 0.14232759177684784, |
| "rewards/rejected": -0.18464641273021698, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 7.542062096451305e-08, |
| "logits/chosen": -2.433202028274536, |
| "logits/rejected": -2.340076446533203, |
| "logps/chosen": -288.4952697753906, |
| "logps/rejected": -292.8149719238281, |
| "loss": 0.6296, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -0.06186067312955856, |
| "rewards/margins": 0.15901610255241394, |
| "rewards/rejected": -0.2208767831325531, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 7.22099426680959e-08, |
| "logits/chosen": -2.3200743198394775, |
| "logits/rejected": -2.2398030757904053, |
| "logps/chosen": -289.97857666015625, |
| "logps/rejected": -268.5564270019531, |
| "loss": 0.6286, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.10072056204080582, |
| "rewards/margins": 0.14450570940971375, |
| "rewards/rejected": -0.24522623419761658, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.42, |
| "eval_logits/chosen": -2.4022796154022217, |
| "eval_logits/rejected": -2.3535399436950684, |
| "eval_logps/chosen": -267.7723693847656, |
| "eval_logps/rejected": -285.21490478515625, |
| "eval_loss": 0.6292469501495361, |
| "eval_rewards/accuracies": 0.7109375, |
| "eval_rewards/chosen": -0.053610093891620636, |
| "eval_rewards/margins": 0.17840130627155304, |
| "eval_rewards/rejected": -0.23201137781143188, |
| "eval_runtime": 125.7898, |
| "eval_samples_per_second": 15.9, |
| "eval_steps_per_second": 0.254, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 6.888076471790423e-08, |
| "logits/chosen": -2.3633980751037598, |
| "logits/rejected": -2.2721266746520996, |
| "logps/chosen": -278.57403564453125, |
| "logps/rejected": -257.5547180175781, |
| "loss": 0.6275, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.019214138388633728, |
| "rewards/margins": 0.17581240832805634, |
| "rewards/rejected": -0.19502654671669006, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 6.545084971874738e-08, |
| "logits/chosen": -2.2926197052001953, |
| "logits/rejected": -2.265472650527954, |
| "logps/chosen": -281.14739990234375, |
| "logps/rejected": -269.4004821777344, |
| "loss": 0.6304, |
| "rewards/accuracies": 0.731249988079071, |
| "rewards/chosen": -0.06231166049838066, |
| "rewards/margins": 0.19198919832706451, |
| "rewards/rejected": -0.2543008327484131, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 6.193849775117709e-08, |
| "logits/chosen": -2.2452166080474854, |
| "logits/rejected": -2.2042713165283203, |
| "logps/chosen": -279.60284423828125, |
| "logps/rejected": -316.6097717285156, |
| "loss": 0.6223, |
| "rewards/accuracies": 0.706250011920929, |
| "rewards/chosen": -0.08384998142719269, |
| "rewards/margins": 0.22490167617797852, |
| "rewards/rejected": -0.3087516725063324, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 5.836244873263989e-08, |
| "logits/chosen": -2.2598187923431396, |
| "logits/rejected": -2.182457685470581, |
| "logps/chosen": -253.05990600585938, |
| "logps/rejected": -265.4845275878906, |
| "loss": 0.6227, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -0.039119161665439606, |
| "rewards/margins": 0.20275244116783142, |
| "rewards/rejected": -0.24187159538269043, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 5.474178243190913e-08, |
| "logits/chosen": -2.3356399536132812, |
| "logits/rejected": -2.2523715496063232, |
| "logps/chosen": -272.73431396484375, |
| "logps/rejected": -282.30047607421875, |
| "loss": 0.6136, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.065894715487957, |
| "rewards/margins": 0.19611124694347382, |
| "rewards/rejected": -0.2620059847831726, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 5.10958166702634e-08, |
| "logits/chosen": -2.310091018676758, |
| "logits/rejected": -2.2063992023468018, |
| "logps/chosen": -281.9244384765625, |
| "logps/rejected": -267.5443420410156, |
| "loss": 0.6078, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -0.0009437998523935676, |
| "rewards/margins": 0.26818323135375977, |
| "rewards/rejected": -0.2691270709037781, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 4.744400425255165e-08, |
| "logits/chosen": -2.290621519088745, |
| "logits/rejected": -2.2106773853302, |
| "logps/chosen": -281.05450439453125, |
| "logps/rejected": -294.99713134765625, |
| "loss": 0.61, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.0785941556096077, |
| "rewards/margins": 0.2625763416290283, |
| "rewards/rejected": -0.34117045998573303, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 4.3805829178062595e-08, |
| "logits/chosen": -2.3119542598724365, |
| "logits/rejected": -2.2208571434020996, |
| "logps/chosen": -286.83673095703125, |
| "logps/rejected": -274.24700927734375, |
| "loss": 0.611, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.13735605776309967, |
| "rewards/margins": 0.2302558869123459, |
| "rewards/rejected": -0.36761194467544556, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 4.020070268495843e-08, |
| "logits/chosen": -2.2847678661346436, |
| "logits/rejected": -2.236466884613037, |
| "logps/chosen": -294.72039794921875, |
| "logps/rejected": -302.5251770019531, |
| "loss": 0.6138, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -0.08734361827373505, |
| "rewards/margins": 0.2090953290462494, |
| "rewards/rejected": -0.29643893241882324, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 3.6647859682920356e-08, |
| "logits/chosen": -2.302154064178467, |
| "logits/rejected": -2.2005538940429688, |
| "logps/chosen": -304.13336181640625, |
| "logps/rejected": -333.745849609375, |
| "loss": 0.6161, |
| "rewards/accuracies": 0.668749988079071, |
| "rewards/chosen": -0.08611747622489929, |
| "rewards/margins": 0.18742723762989044, |
| "rewards/rejected": -0.27354469895362854, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.63, |
| "eval_logits/chosen": -2.3264002799987793, |
| "eval_logits/rejected": -2.2759318351745605, |
| "eval_logps/chosen": -270.8907470703125, |
| "eval_logps/rejected": -295.5617370605469, |
| "eval_loss": 0.606550931930542, |
| "eval_rewards/accuracies": 0.71875, |
| "eval_rewards/chosen": -0.08479367196559906, |
| "eval_rewards/margins": 0.25068604946136475, |
| "eval_rewards/rejected": -0.335479736328125, |
| "eval_runtime": 126.491, |
| "eval_samples_per_second": 15.811, |
| "eval_steps_per_second": 0.253, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 3.316625612658315e-08, |
| "logits/chosen": -2.3386969566345215, |
| "logits/rejected": -2.2128827571868896, |
| "logps/chosen": -283.69805908203125, |
| "logps/rejected": -254.1611328125, |
| "loss": 0.6035, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.10745751857757568, |
| "rewards/margins": 0.25038108229637146, |
| "rewards/rejected": -0.35783863067626953, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 2.9774467877315317e-08, |
| "logits/chosen": -2.2364108562469482, |
| "logits/rejected": -2.154195785522461, |
| "logps/chosen": -254.5322723388672, |
| "logps/rejected": -277.83294677734375, |
| "loss": 0.5964, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.08545304834842682, |
| "rewards/margins": 0.3306066691875458, |
| "rewards/rejected": -0.4160597324371338, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 2.6490591592961574e-08, |
| "logits/chosen": -2.321842908859253, |
| "logits/rejected": -2.27044677734375, |
| "logps/chosen": -293.48895263671875, |
| "logps/rejected": -288.6790466308594, |
| "loss": 0.5983, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.08705135434865952, |
| "rewards/margins": 0.27239271998405457, |
| "rewards/rejected": -0.3594440817832947, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 2.3332148174343254e-08, |
| "logits/chosen": -2.2017993927001953, |
| "logits/rejected": -2.1690361499786377, |
| "logps/chosen": -269.119873046875, |
| "logps/rejected": -278.8016662597656, |
| "loss": 0.6085, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.09265842288732529, |
| "rewards/margins": 0.21494929492473602, |
| "rewards/rejected": -0.3076077103614807, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 2.031598928367147e-08, |
| "logits/chosen": -2.225857734680176, |
| "logits/rejected": -2.1034350395202637, |
| "logps/chosen": -263.73974609375, |
| "logps/rejected": -271.5313415527344, |
| "loss": 0.5993, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -0.1507432460784912, |
| "rewards/margins": 0.24868826568126678, |
| "rewards/rejected": -0.3994315266609192, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 1.7458207433638223e-08, |
| "logits/chosen": -2.311649799346924, |
| "logits/rejected": -2.1799566745758057, |
| "logps/chosen": -327.11041259765625, |
| "logps/rejected": -300.2535095214844, |
| "loss": 0.6032, |
| "rewards/accuracies": 0.668749988079071, |
| "rewards/chosen": -0.09650352597236633, |
| "rewards/margins": 0.29801806807518005, |
| "rewards/rejected": -0.394521564245224, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 1.4774050126898163e-08, |
| "logits/chosen": -2.3074421882629395, |
| "logits/rejected": -2.187077283859253, |
| "logps/chosen": -319.43560791015625, |
| "logps/rejected": -296.7154846191406, |
| "loss": 0.5996, |
| "rewards/accuracies": 0.7437499761581421, |
| "rewards/chosen": -0.09320107102394104, |
| "rewards/margins": 0.314532995223999, |
| "rewards/rejected": -0.40773409605026245, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.2277838504044869e-08, |
| "logits/chosen": -2.2485527992248535, |
| "logits/rejected": -2.1728129386901855, |
| "logps/chosen": -296.4371643066406, |
| "logps/rejected": -318.8984680175781, |
| "loss": 0.5896, |
| "rewards/accuracies": 0.6937500238418579, |
| "rewards/chosen": -0.1758451759815216, |
| "rewards/margins": 0.26139548420906067, |
| "rewards/rejected": -0.4372406005859375, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 9.982890934129379e-09, |
| "logits/chosen": -2.2180557250976562, |
| "logits/rejected": -2.164605140686035, |
| "logps/chosen": -286.80487060546875, |
| "logps/rejected": -304.9507141113281, |
| "loss": 0.603, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.18554511666297913, |
| "rewards/margins": 0.17823219299316406, |
| "rewards/rejected": -0.3637773096561432, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 7.901451955398791e-09, |
| "logits/chosen": -2.188835382461548, |
| "logits/rejected": -2.1698737144470215, |
| "logps/chosen": -331.4312744140625, |
| "logps/rejected": -333.14471435546875, |
| "loss": 0.5908, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -0.13692975044250488, |
| "rewards/margins": 0.26922523975372314, |
| "rewards/rejected": -0.40615496039390564, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.84, |
| "eval_logits/chosen": -2.3026418685913086, |
| "eval_logits/rejected": -2.251879930496216, |
| "eval_logps/chosen": -272.7594299316406, |
| "eval_logps/rejected": -300.471435546875, |
| "eval_loss": 0.6002275347709656, |
| "eval_rewards/accuracies": 0.72265625, |
| "eval_rewards/chosen": -0.10348068922758102, |
| "eval_rewards/margins": 0.2810956835746765, |
| "eval_rewards/rejected": -0.38457638025283813, |
| "eval_runtime": 129.5587, |
| "eval_samples_per_second": 15.437, |
| "eval_steps_per_second": 0.247, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 6.044626945386894e-09, |
| "logits/chosen": -2.229055881500244, |
| "logits/rejected": -2.1533703804016113, |
| "logps/chosen": -271.81182861328125, |
| "logps/rejected": -285.35748291015625, |
| "loss": 0.61, |
| "rewards/accuracies": 0.731249988079071, |
| "rewards/chosen": -0.13526737689971924, |
| "rewards/margins": 0.255347341299057, |
| "rewards/rejected": -0.39061471819877625, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 4.422322868919937e-09, |
| "logits/chosen": -2.2021594047546387, |
| "logits/rejected": -2.1868810653686523, |
| "logps/chosen": -285.3330383300781, |
| "logps/rejected": -319.97686767578125, |
| "loss": 0.6048, |
| "rewards/accuracies": 0.6937500238418579, |
| "rewards/chosen": -0.09117873013019562, |
| "rewards/margins": 0.24966660141944885, |
| "rewards/rejected": -0.34084534645080566, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 3.043195420172878e-09, |
| "logits/chosen": -2.312375545501709, |
| "logits/rejected": -2.2687058448791504, |
| "logps/chosen": -306.77484130859375, |
| "logps/rejected": -292.4010314941406, |
| "loss": 0.6081, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -0.10561072826385498, |
| "rewards/margins": 0.275703489780426, |
| "rewards/rejected": -0.381314218044281, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 1.914602840795848e-09, |
| "logits/chosen": -2.260124921798706, |
| "logits/rejected": -2.2002854347229004, |
| "logps/chosen": -271.2167053222656, |
| "logps/rejected": -286.7249755859375, |
| "loss": 0.6024, |
| "rewards/accuracies": 0.668749988079071, |
| "rewards/chosen": -0.13646569848060608, |
| "rewards/margins": 0.19637010991573334, |
| "rewards/rejected": -0.3328357934951782, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 1.0425666605112514e-09, |
| "logits/chosen": -2.1782705783843994, |
| "logits/rejected": -2.1199605464935303, |
| "logps/chosen": -253.34414672851562, |
| "logps/rejected": -260.60638427734375, |
| "loss": 0.6119, |
| "rewards/accuracies": 0.6937500238418579, |
| "rewards/chosen": -0.09690654277801514, |
| "rewards/margins": 0.21923665702342987, |
| "rewards/rejected": -0.3161432147026062, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 4.317395696473214e-10, |
| "logits/chosen": -2.216618299484253, |
| "logits/rejected": -2.1176908016204834, |
| "logps/chosen": -253.55648803710938, |
| "logps/rejected": -267.17169189453125, |
| "loss": 0.593, |
| "rewards/accuracies": 0.731249988079071, |
| "rewards/chosen": -0.1348879039287567, |
| "rewards/margins": 0.2610943615436554, |
| "rewards/rejected": -0.3959822356700897, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 8.538059502214978e-11, |
| "logits/chosen": -2.3197951316833496, |
| "logits/rejected": -2.164881706237793, |
| "logps/chosen": -302.24468994140625, |
| "logps/rejected": -304.28656005859375, |
| "loss": 0.588, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.046127352863550186, |
| "rewards/margins": 0.3802485764026642, |
| "rewards/rejected": -0.42637595534324646, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 478, |
| "total_flos": 0.0, |
| "train_loss": 0.6323943896273688, |
| "train_runtime": 8575.3767, |
| "train_samples_per_second": 7.129, |
| "train_steps_per_second": 0.056 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 478, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1000, |
| "total_flos": 0.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|