| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9979517674264948, |
| "eval_steps": 500, |
| "global_step": 472, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.002114304592005286, |
| "grad_norm": 1.5018059015274048, |
| "learning_rate": 2.083333333333333e-08, |
| "logits/chosen": -0.3466828167438507, |
| "logits/rejected": -0.30099987983703613, |
| "logps/chosen": -0.9345186948776245, |
| "logps/rejected": -0.9117153882980347, |
| "loss": 1.4889, |
| "rewards/accuracies": 0.53125, |
| "rewards/chosen": -1.869037389755249, |
| "rewards/margins": -0.04560665041208267, |
| "rewards/rejected": -1.8234307765960693, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.004228609184010572, |
| "grad_norm": 0.8093975186347961, |
| "learning_rate": 4.166666666666666e-08, |
| "logits/chosen": -0.4310421049594879, |
| "logits/rejected": -0.39132067561149597, |
| "logps/chosen": -0.8198825716972351, |
| "logps/rejected": -0.8644211888313293, |
| "loss": 1.376, |
| "rewards/accuracies": 0.515625, |
| "rewards/chosen": -1.6397651433944702, |
| "rewards/margins": 0.08907715976238251, |
| "rewards/rejected": -1.7288423776626587, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.006342913776015857, |
| "grad_norm": 0.5377389788627625, |
| "learning_rate": 6.25e-08, |
| "logits/chosen": -0.46692028641700745, |
| "logits/rejected": -0.4649256467819214, |
| "logps/chosen": -0.9087910652160645, |
| "logps/rejected": -0.9648240804672241, |
| "loss": 1.3404, |
| "rewards/accuracies": 0.5703125, |
| "rewards/chosen": -1.817582130432129, |
| "rewards/margins": 0.11206617206335068, |
| "rewards/rejected": -1.9296481609344482, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.008457218368021144, |
| "grad_norm": 0.3221875727176666, |
| "learning_rate": 8.333333333333333e-08, |
| "logits/chosen": -0.416828453540802, |
| "logits/rejected": -0.3584724962711334, |
| "logps/chosen": -0.7818898558616638, |
| "logps/rejected": -0.8170815110206604, |
| "loss": 1.3806, |
| "rewards/accuracies": 0.484375, |
| "rewards/chosen": -1.5637797117233276, |
| "rewards/margins": 0.07038339227437973, |
| "rewards/rejected": -1.6341630220413208, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.010571522960026428, |
| "grad_norm": 0.64655601978302, |
| "learning_rate": 1.0416666666666667e-07, |
| "logits/chosen": -0.376886248588562, |
| "logits/rejected": -0.3516141474246979, |
| "logps/chosen": -0.8814125061035156, |
| "logps/rejected": -1.0214396715164185, |
| "loss": 1.2741, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -1.7628250122070312, |
| "rewards/margins": 0.28005433082580566, |
| "rewards/rejected": -2.042879343032837, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.012685827552031714, |
| "grad_norm": 0.4775894582271576, |
| "learning_rate": 1.25e-07, |
| "logits/chosen": -0.4757865369319916, |
| "logits/rejected": -0.4498941898345947, |
| "logps/chosen": -0.8962199687957764, |
| "logps/rejected": -0.9462199807167053, |
| "loss": 1.364, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -1.7924399375915527, |
| "rewards/margins": 0.10000008344650269, |
| "rewards/rejected": -1.8924399614334106, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.014800132144037, |
| "grad_norm": 1.2459568977355957, |
| "learning_rate": 1.4583333333333335e-07, |
| "logits/chosen": -0.38895344734191895, |
| "logits/rejected": -0.38165366649627686, |
| "logps/chosen": -0.9025766253471375, |
| "logps/rejected": -0.9465017318725586, |
| "loss": 1.3898, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -1.805153250694275, |
| "rewards/margins": 0.0878501906991005, |
| "rewards/rejected": -1.8930034637451172, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.016914436736042288, |
| "grad_norm": 0.6195729374885559, |
| "learning_rate": 1.6666666666666665e-07, |
| "logits/chosen": -0.3964853286743164, |
| "logits/rejected": -0.377862811088562, |
| "logps/chosen": -0.9054160118103027, |
| "logps/rejected": -0.9605879187583923, |
| "loss": 1.3821, |
| "rewards/accuracies": 0.4765625, |
| "rewards/chosen": -1.8108320236206055, |
| "rewards/margins": 0.1103438138961792, |
| "rewards/rejected": -1.9211758375167847, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.019028741328047574, |
| "grad_norm": 1.2074137926101685, |
| "learning_rate": 1.875e-07, |
| "logits/chosen": -0.3729037344455719, |
| "logits/rejected": -0.38143450021743774, |
| "logps/chosen": -0.9328653216362, |
| "logps/rejected": -0.9905799627304077, |
| "loss": 1.3754, |
| "rewards/accuracies": 0.5078125, |
| "rewards/chosen": -1.8657306432724, |
| "rewards/margins": 0.11542946100234985, |
| "rewards/rejected": -1.9811599254608154, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.021143045920052856, |
| "grad_norm": 0.2867220640182495, |
| "learning_rate": 2.0833333333333333e-07, |
| "logits/chosen": -0.4263336658477783, |
| "logits/rejected": -0.42903271317481995, |
| "logps/chosen": -0.8979260325431824, |
| "logps/rejected": -0.9078099727630615, |
| "loss": 1.4438, |
| "rewards/accuracies": 0.515625, |
| "rewards/chosen": -1.7958520650863647, |
| "rewards/margins": 0.019767940044403076, |
| "rewards/rejected": -1.815619945526123, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.023257350512058142, |
| "grad_norm": 0.8363026976585388, |
| "learning_rate": 2.2916666666666663e-07, |
| "logits/chosen": -0.3374914526939392, |
| "logits/rejected": -0.32399696111679077, |
| "logps/chosen": -0.8886098861694336, |
| "logps/rejected": -0.9484556317329407, |
| "loss": 1.3422, |
| "rewards/accuracies": 0.546875, |
| "rewards/chosen": -1.7772197723388672, |
| "rewards/margins": 0.11969132721424103, |
| "rewards/rejected": -1.8969112634658813, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.025371655104063428, |
| "grad_norm": 0.5406804084777832, |
| "learning_rate": 2.5e-07, |
| "logits/chosen": -0.42844679951667786, |
| "logits/rejected": -0.37984615564346313, |
| "logps/chosen": -0.861629843711853, |
| "logps/rejected": -0.8968492150306702, |
| "loss": 1.3922, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -1.723259687423706, |
| "rewards/margins": 0.07043875753879547, |
| "rewards/rejected": -1.7936984300613403, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.027485959696068714, |
| "grad_norm": 0.9919329285621643, |
| "learning_rate": 2.708333333333333e-07, |
| "logits/chosen": -0.36495402455329895, |
| "logits/rejected": -0.3249490261077881, |
| "logps/chosen": -0.8502095937728882, |
| "logps/rejected": -0.8470643758773804, |
| "loss": 1.4334, |
| "rewards/accuracies": 0.5078125, |
| "rewards/chosen": -1.7004191875457764, |
| "rewards/margins": -0.006290358491241932, |
| "rewards/rejected": -1.6941287517547607, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.029600264288074, |
| "grad_norm": 0.5477162003517151, |
| "learning_rate": 2.916666666666667e-07, |
| "logits/chosen": -0.4155704081058502, |
| "logits/rejected": -0.39535820484161377, |
| "logps/chosen": -1.0430240631103516, |
| "logps/rejected": -1.1318373680114746, |
| "loss": 1.3533, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": -2.086048126220703, |
| "rewards/margins": 0.17762640118598938, |
| "rewards/rejected": -2.263674736022949, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.031714568880079286, |
| "grad_norm": 0.26530712842941284, |
| "learning_rate": 3.1249999999999997e-07, |
| "logits/chosen": -0.4810572564601898, |
| "logits/rejected": -0.42454615235328674, |
| "logps/chosen": -0.8741041421890259, |
| "logps/rejected": -0.9494178295135498, |
| "loss": 1.3655, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -1.7482082843780518, |
| "rewards/margins": 0.15062758326530457, |
| "rewards/rejected": -1.8988356590270996, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.033828873472084575, |
| "grad_norm": 0.9272629618644714, |
| "learning_rate": 3.333333333333333e-07, |
| "logits/chosen": -0.4440098702907562, |
| "logits/rejected": -0.3930297791957855, |
| "logps/chosen": -0.8473359942436218, |
| "logps/rejected": -0.9369213581085205, |
| "loss": 1.3248, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -1.6946719884872437, |
| "rewards/margins": 0.17917080223560333, |
| "rewards/rejected": -1.873842716217041, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.03594317806408986, |
| "grad_norm": 0.5912418961524963, |
| "learning_rate": 3.541666666666667e-07, |
| "logits/chosen": -0.3838099539279938, |
| "logits/rejected": -0.3507584035396576, |
| "logps/chosen": -0.8888350129127502, |
| "logps/rejected": -0.9361770749092102, |
| "loss": 1.383, |
| "rewards/accuracies": 0.4921875, |
| "rewards/chosen": -1.7776700258255005, |
| "rewards/margins": 0.0946839451789856, |
| "rewards/rejected": -1.8723541498184204, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.03805748265609515, |
| "grad_norm": 0.6536504030227661, |
| "learning_rate": 3.75e-07, |
| "logits/chosen": -0.3581697940826416, |
| "logits/rejected": -0.3620460629463196, |
| "logps/chosen": -0.8519617319107056, |
| "logps/rejected": -0.9022184610366821, |
| "loss": 1.3841, |
| "rewards/accuracies": 0.5078125, |
| "rewards/chosen": -1.7039234638214111, |
| "rewards/margins": 0.10051343590021133, |
| "rewards/rejected": -1.8044369220733643, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.04017178724810043, |
| "grad_norm": 0.3433632552623749, |
| "learning_rate": 3.958333333333333e-07, |
| "logits/chosen": -0.37887442111968994, |
| "logits/rejected": -0.37543320655822754, |
| "logps/chosen": -0.9464104175567627, |
| "logps/rejected": -1.0017329454421997, |
| "loss": 1.3649, |
| "rewards/accuracies": 0.5703125, |
| "rewards/chosen": -1.8928208351135254, |
| "rewards/margins": 0.11064518243074417, |
| "rewards/rejected": -2.0034658908843994, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.04228609184010571, |
| "grad_norm": 0.9764007329940796, |
| "learning_rate": 4.1666666666666667e-07, |
| "logits/chosen": -0.44110679626464844, |
| "logits/rejected": -0.4280649721622467, |
| "logps/chosen": -0.9046768546104431, |
| "logps/rejected": -1.0464633703231812, |
| "loss": 1.2592, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -1.8093537092208862, |
| "rewards/margins": 0.2835729420185089, |
| "rewards/rejected": -2.0929267406463623, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.044400396432111, |
| "grad_norm": 1.8563830852508545, |
| "learning_rate": 4.375e-07, |
| "logits/chosen": -0.45183491706848145, |
| "logits/rejected": -0.42935287952423096, |
| "logps/chosen": -0.9043138027191162, |
| "logps/rejected": -0.9462392926216125, |
| "loss": 1.3784, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -1.8086276054382324, |
| "rewards/margins": 0.08385094255208969, |
| "rewards/rejected": -1.892478585243225, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.046514701024116284, |
| "grad_norm": 1.3473299741744995, |
| "learning_rate": 4.5833333333333327e-07, |
| "logits/chosen": -0.37855517864227295, |
| "logits/rejected": -0.34429043531417847, |
| "logps/chosen": -0.9284683465957642, |
| "logps/rejected": -0.9454050064086914, |
| "loss": 1.4346, |
| "rewards/accuracies": 0.5234375, |
| "rewards/chosen": -1.8569366931915283, |
| "rewards/margins": 0.03387312963604927, |
| "rewards/rejected": -1.8908100128173828, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.04862900561612157, |
| "grad_norm": 0.940831184387207, |
| "learning_rate": 4.791666666666667e-07, |
| "logits/chosen": -0.39172160625457764, |
| "logits/rejected": -0.3695780634880066, |
| "logps/chosen": -0.9314202666282654, |
| "logps/rejected": -1.020229697227478, |
| "loss": 1.3322, |
| "rewards/accuracies": 0.5703125, |
| "rewards/chosen": -1.8628405332565308, |
| "rewards/margins": 0.17761869728565216, |
| "rewards/rejected": -2.040459394454956, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.050743310208126856, |
| "grad_norm": 0.5783158540725708, |
| "learning_rate": 5e-07, |
| "logits/chosen": -0.4958629608154297, |
| "logits/rejected": -0.4257377088069916, |
| "logps/chosen": -0.9379237294197083, |
| "logps/rejected": -0.9415461421012878, |
| "loss": 1.441, |
| "rewards/accuracies": 0.515625, |
| "rewards/chosen": -1.8758474588394165, |
| "rewards/margins": 0.0072449808940291405, |
| "rewards/rejected": -1.8830922842025757, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.052857614800132145, |
| "grad_norm": 1.4209853410720825, |
| "learning_rate": 5.208333333333334e-07, |
| "logits/chosen": -0.36407172679901123, |
| "logits/rejected": -0.3331725299358368, |
| "logps/chosen": -0.9192589521408081, |
| "logps/rejected": -0.9595308899879456, |
| "loss": 1.3994, |
| "rewards/accuracies": 0.5078125, |
| "rewards/chosen": -1.8385179042816162, |
| "rewards/margins": 0.080544114112854, |
| "rewards/rejected": -1.9190617799758911, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.05497191939213743, |
| "grad_norm": 0.6310216188430786, |
| "learning_rate": 5.416666666666666e-07, |
| "logits/chosen": -0.41772690415382385, |
| "logits/rejected": -0.36565953493118286, |
| "logps/chosen": -0.8052878379821777, |
| "logps/rejected": -0.8673746585845947, |
| "loss": 1.3356, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -1.6105756759643555, |
| "rewards/margins": 0.12417369335889816, |
| "rewards/rejected": -1.7347493171691895, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.05708622398414272, |
| "grad_norm": 1.2933462858200073, |
| "learning_rate": 5.625e-07, |
| "logits/chosen": -0.4482795000076294, |
| "logits/rejected": -0.39409321546554565, |
| "logps/chosen": -0.8339261412620544, |
| "logps/rejected": -0.8675202131271362, |
| "loss": 1.3739, |
| "rewards/accuracies": 0.4921875, |
| "rewards/chosen": -1.6678522825241089, |
| "rewards/margins": 0.06718815863132477, |
| "rewards/rejected": -1.7350404262542725, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.059200528576148, |
| "grad_norm": 0.5808025002479553, |
| "learning_rate": 5.833333333333334e-07, |
| "logits/chosen": -0.37116044759750366, |
| "logits/rejected": -0.3478051722049713, |
| "logps/chosen": -0.8950318694114685, |
| "logps/rejected": -0.9756672978401184, |
| "loss": 1.3505, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -1.790063738822937, |
| "rewards/margins": 0.1612708568572998, |
| "rewards/rejected": -1.9513345956802368, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.06131483316815329, |
| "grad_norm": 1.0569533109664917, |
| "learning_rate": 6.041666666666666e-07, |
| "logits/chosen": -0.421148419380188, |
| "logits/rejected": -0.38443076610565186, |
| "logps/chosen": -0.8021283745765686, |
| "logps/rejected": -0.8370179533958435, |
| "loss": 1.3916, |
| "rewards/accuracies": 0.46875, |
| "rewards/chosen": -1.6042567491531372, |
| "rewards/margins": 0.06977920234203339, |
| "rewards/rejected": -1.674035906791687, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.06342913776015857, |
| "grad_norm": 0.42577147483825684, |
| "learning_rate": 6.249999999999999e-07, |
| "logits/chosen": -0.4429818391799927, |
| "logits/rejected": -0.3524704575538635, |
| "logps/chosen": -0.8916822671890259, |
| "logps/rejected": -0.8985542058944702, |
| "loss": 1.4321, |
| "rewards/accuracies": 0.5234375, |
| "rewards/chosen": -1.7833645343780518, |
| "rewards/margins": 0.01374388113617897, |
| "rewards/rejected": -1.7971084117889404, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.06554344235216386, |
| "grad_norm": 1.0056904554367065, |
| "learning_rate": 6.458333333333333e-07, |
| "logits/chosen": -0.376451700925827, |
| "logits/rejected": -0.342519074678421, |
| "logps/chosen": -0.9038617014884949, |
| "logps/rejected": -0.953092634677887, |
| "loss": 1.398, |
| "rewards/accuracies": 0.4765625, |
| "rewards/chosen": -1.8077234029769897, |
| "rewards/margins": 0.09846188127994537, |
| "rewards/rejected": -1.906185269355774, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.06765774694416915, |
| "grad_norm": 0.5494012236595154, |
| "learning_rate": 6.666666666666666e-07, |
| "logits/chosen": -0.3459138870239258, |
| "logits/rejected": -0.3590989410877228, |
| "logps/chosen": -0.8274999260902405, |
| "logps/rejected": -0.8776509761810303, |
| "loss": 1.363, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -1.654999852180481, |
| "rewards/margins": 0.1003020703792572, |
| "rewards/rejected": -1.7553019523620605, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.06977205153617443, |
| "grad_norm": 0.693267822265625, |
| "learning_rate": 6.875e-07, |
| "logits/chosen": -0.40053680539131165, |
| "logits/rejected": -0.37323904037475586, |
| "logps/chosen": -0.8255244493484497, |
| "logps/rejected": -0.8658804893493652, |
| "loss": 1.3712, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -1.6510488986968994, |
| "rewards/margins": 0.08071210980415344, |
| "rewards/rejected": -1.7317609786987305, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.07188635612817972, |
| "grad_norm": 2.213238000869751, |
| "learning_rate": 7.083333333333334e-07, |
| "logits/chosen": -0.40097948908805847, |
| "logits/rejected": -0.38190510869026184, |
| "logps/chosen": -0.9122671484947205, |
| "logps/rejected": -0.9549552798271179, |
| "loss": 1.36, |
| "rewards/accuracies": 0.5390625, |
| "rewards/chosen": -1.824534296989441, |
| "rewards/margins": 0.0853763073682785, |
| "rewards/rejected": -1.9099105596542358, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.074000660720185, |
| "grad_norm": 0.6859830021858215, |
| "learning_rate": 7.291666666666666e-07, |
| "logits/chosen": -0.42501094937324524, |
| "logits/rejected": -0.42549416422843933, |
| "logps/chosen": -1.0008373260498047, |
| "logps/rejected": -1.1157118082046509, |
| "loss": 1.3294, |
| "rewards/accuracies": 0.5546875, |
| "rewards/chosen": -2.0016746520996094, |
| "rewards/margins": 0.22974897921085358, |
| "rewards/rejected": -2.2314236164093018, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.0761149653121903, |
| "grad_norm": 0.6468721628189087, |
| "learning_rate": 7.5e-07, |
| "logits/chosen": -0.36494994163513184, |
| "logits/rejected": -0.30433908104896545, |
| "logps/chosen": -0.9062094688415527, |
| "logps/rejected": -0.920263409614563, |
| "loss": 1.4312, |
| "rewards/accuracies": 0.4921875, |
| "rewards/chosen": -1.8124189376831055, |
| "rewards/margins": 0.02810765616595745, |
| "rewards/rejected": -1.840526819229126, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.07822926990419557, |
| "grad_norm": 0.5085556507110596, |
| "learning_rate": 7.708333333333333e-07, |
| "logits/chosen": -0.4677881598472595, |
| "logits/rejected": -0.456132709980011, |
| "logps/chosen": -1.0101865530014038, |
| "logps/rejected": -1.0429682731628418, |
| "loss": 1.4132, |
| "rewards/accuracies": 0.5234375, |
| "rewards/chosen": -2.0203731060028076, |
| "rewards/margins": 0.06556359678506851, |
| "rewards/rejected": -2.0859365463256836, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.08034357449620086, |
| "grad_norm": 0.23813335597515106, |
| "learning_rate": 7.916666666666666e-07, |
| "logits/chosen": -0.3991190791130066, |
| "logits/rejected": -0.3664044141769409, |
| "logps/chosen": -0.9578174352645874, |
| "logps/rejected": -0.9229263067245483, |
| "loss": 1.4824, |
| "rewards/accuracies": 0.4609375, |
| "rewards/chosen": -1.9156348705291748, |
| "rewards/margins": -0.06978224962949753, |
| "rewards/rejected": -1.8458526134490967, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.08245787908820615, |
| "grad_norm": 0.587037980556488, |
| "learning_rate": 8.125e-07, |
| "logits/chosen": -0.37554049491882324, |
| "logits/rejected": -0.36305734515190125, |
| "logps/chosen": -0.8503091931343079, |
| "logps/rejected": -0.864615261554718, |
| "loss": 1.4086, |
| "rewards/accuracies": 0.4296875, |
| "rewards/chosen": -1.7006183862686157, |
| "rewards/margins": 0.028611989691853523, |
| "rewards/rejected": -1.729230523109436, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.08457218368021142, |
| "grad_norm": 0.4172501862049103, |
| "learning_rate": 8.333333333333333e-07, |
| "logits/chosen": -0.4405443072319031, |
| "logits/rejected": -0.41723060607910156, |
| "logps/chosen": -0.8502858877182007, |
| "logps/rejected": -0.9114271402359009, |
| "loss": 1.3446, |
| "rewards/accuracies": 0.546875, |
| "rewards/chosen": -1.7005717754364014, |
| "rewards/margins": 0.12228240817785263, |
| "rewards/rejected": -1.8228542804718018, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.08668648827221671, |
| "grad_norm": 0.9275372624397278, |
| "learning_rate": 8.541666666666666e-07, |
| "logits/chosen": -0.4200601577758789, |
| "logits/rejected": -0.3478623628616333, |
| "logps/chosen": -0.892408013343811, |
| "logps/rejected": -0.9276402592658997, |
| "loss": 1.3887, |
| "rewards/accuracies": 0.46875, |
| "rewards/chosen": -1.784816026687622, |
| "rewards/margins": 0.07046431303024292, |
| "rewards/rejected": -1.8552805185317993, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.088800792864222, |
| "grad_norm": 0.7317383289337158, |
| "learning_rate": 8.75e-07, |
| "logits/chosen": -0.37675267457962036, |
| "logits/rejected": -0.33540332317352295, |
| "logps/chosen": -0.7866061925888062, |
| "logps/rejected": -0.824250340461731, |
| "loss": 1.3837, |
| "rewards/accuracies": 0.5078125, |
| "rewards/chosen": -1.5732123851776123, |
| "rewards/margins": 0.07528844475746155, |
| "rewards/rejected": -1.648500680923462, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.09091509745622729, |
| "grad_norm": 0.9452736973762512, |
| "learning_rate": 8.958333333333334e-07, |
| "logits/chosen": -0.4662383198738098, |
| "logits/rejected": -0.4447881579399109, |
| "logps/chosen": -0.9490666389465332, |
| "logps/rejected": -1.0112388134002686, |
| "loss": 1.3412, |
| "rewards/accuracies": 0.5390625, |
| "rewards/chosen": -1.8981332778930664, |
| "rewards/margins": 0.12434446066617966, |
| "rewards/rejected": -2.022477626800537, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.09302940204823257, |
| "grad_norm": 0.2848323881626129, |
| "learning_rate": 9.166666666666665e-07, |
| "logits/chosen": -0.41404005885124207, |
| "logits/rejected": -0.3944583535194397, |
| "logps/chosen": -0.8224930167198181, |
| "logps/rejected": -0.8416361808776855, |
| "loss": 1.4027, |
| "rewards/accuracies": 0.609375, |
| "rewards/chosen": -1.6449860334396362, |
| "rewards/margins": 0.038286346942186356, |
| "rewards/rejected": -1.683272361755371, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.09514370664023786, |
| "grad_norm": 0.7165678143501282, |
| "learning_rate": 9.374999999999999e-07, |
| "logits/chosen": -0.40475326776504517, |
| "logits/rejected": -0.3559921383857727, |
| "logps/chosen": -0.8070214986801147, |
| "logps/rejected": -0.8993593454360962, |
| "loss": 1.3148, |
| "rewards/accuracies": 0.53125, |
| "rewards/chosen": -1.6140429973602295, |
| "rewards/margins": 0.18467575311660767, |
| "rewards/rejected": -1.7987186908721924, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.09725801123224315, |
| "grad_norm": 0.4779021739959717, |
| "learning_rate": 9.583333333333334e-07, |
| "logits/chosen": -0.4171525835990906, |
| "logits/rejected": -0.42166149616241455, |
| "logps/chosen": -0.7872560024261475, |
| "logps/rejected": -0.8496187925338745, |
| "loss": 1.3356, |
| "rewards/accuracies": 0.5546875, |
| "rewards/chosen": -1.574512004852295, |
| "rewards/margins": 0.12472567707300186, |
| "rewards/rejected": -1.699237585067749, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.09937231582424844, |
| "grad_norm": 0.7870219349861145, |
| "learning_rate": 9.791666666666667e-07, |
| "logits/chosen": -0.3734116554260254, |
| "logits/rejected": -0.32778748869895935, |
| "logps/chosen": -0.7842286825180054, |
| "logps/rejected": -0.8161548972129822, |
| "loss": 1.3647, |
| "rewards/accuracies": 0.5078125, |
| "rewards/chosen": -1.5684573650360107, |
| "rewards/margins": 0.06385258585214615, |
| "rewards/rejected": -1.6323097944259644, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.10148662041625371, |
| "grad_norm": 0.2597256600856781, |
| "learning_rate": 1e-06, |
| "logits/chosen": -0.4355677664279938, |
| "logits/rejected": -0.38983187079429626, |
| "logps/chosen": -0.8787693977355957, |
| "logps/rejected": -0.9383041262626648, |
| "loss": 1.35, |
| "rewards/accuracies": 0.5546875, |
| "rewards/chosen": -1.7575387954711914, |
| "rewards/margins": 0.11906948685646057, |
| "rewards/rejected": -1.8766082525253296, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.103600925008259, |
| "grad_norm": 0.9942799210548401, |
| "learning_rate": 9.999862751990697e-07, |
| "logits/chosen": -0.4244321882724762, |
| "logits/rejected": -0.4366786777973175, |
| "logps/chosen": -0.7910157442092896, |
| "logps/rejected": -0.8630884885787964, |
| "loss": 1.3166, |
| "rewards/accuracies": 0.5546875, |
| "rewards/chosen": -1.582031488418579, |
| "rewards/margins": 0.14414538443088531, |
| "rewards/rejected": -1.7261769771575928, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.10571522960026429, |
| "grad_norm": 0.5333903431892395, |
| "learning_rate": 9.999451015497595e-07, |
| "logits/chosen": -0.389942467212677, |
| "logits/rejected": -0.36674585938453674, |
| "logps/chosen": -0.7312074899673462, |
| "logps/rejected": -0.7289648652076721, |
| "loss": 1.4225, |
| "rewards/accuracies": 0.4765625, |
| "rewards/chosen": -1.4624149799346924, |
| "rewards/margins": -0.004485193639993668, |
| "rewards/rejected": -1.4579297304153442, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.10782953419226958, |
| "grad_norm": 0.5712242722511292, |
| "learning_rate": 9.9987648131247e-07, |
| "logits/chosen": -0.4622853994369507, |
| "logits/rejected": -0.3728552460670471, |
| "logps/chosen": -0.8764299750328064, |
| "logps/rejected": -0.869678795337677, |
| "loss": 1.4542, |
| "rewards/accuracies": 0.53125, |
| "rewards/chosen": -1.7528599500656128, |
| "rewards/margins": -0.013502337038516998, |
| "rewards/rejected": -1.739357590675354, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.10994383878427486, |
| "grad_norm": 0.2586441934108734, |
| "learning_rate": 9.99780418254397e-07, |
| "logits/chosen": -0.37249019742012024, |
| "logits/rejected": -0.3998304605484009, |
| "logps/chosen": -0.8435611724853516, |
| "logps/rejected": -0.9359882473945618, |
| "loss": 1.3057, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -1.6871223449707031, |
| "rewards/margins": 0.18485431373119354, |
| "rewards/rejected": -1.8719764947891235, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.11205814337628014, |
| "grad_norm": 1.0829113721847534, |
| "learning_rate": 9.996569176493268e-07, |
| "logits/chosen": -0.47697725892066956, |
| "logits/rejected": -0.4208195209503174, |
| "logps/chosen": -0.8014968037605286, |
| "logps/rejected": -0.8703804612159729, |
| "loss": 1.3523, |
| "rewards/accuracies": 0.5703125, |
| "rewards/chosen": -1.6029936075210571, |
| "rewards/margins": 0.1377674788236618, |
| "rewards/rejected": -1.7407609224319458, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.11417244796828543, |
| "grad_norm": 0.5523208379745483, |
| "learning_rate": 9.995059862773438e-07, |
| "logits/chosen": -0.40533363819122314, |
| "logits/rejected": -0.36801978945732117, |
| "logps/chosen": -0.7641825675964355, |
| "logps/rejected": -0.8168596029281616, |
| "loss": 1.3692, |
| "rewards/accuracies": 0.53125, |
| "rewards/chosen": -1.528365135192871, |
| "rewards/margins": 0.10535416752099991, |
| "rewards/rejected": -1.6337192058563232, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.11628675256029072, |
| "grad_norm": 0.614101767539978, |
| "learning_rate": 9.993276324244605e-07, |
| "logits/chosen": -0.4476906955242157, |
| "logits/rejected": -0.40396648645401, |
| "logps/chosen": -0.8706808090209961, |
| "logps/rejected": -0.9221430420875549, |
| "loss": 1.3787, |
| "rewards/accuracies": 0.515625, |
| "rewards/chosen": -1.7413616180419922, |
| "rewards/margins": 0.10292442888021469, |
| "rewards/rejected": -1.8442860841751099, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.118401057152296, |
| "grad_norm": 0.3428778052330017, |
| "learning_rate": 9.991218658821608e-07, |
| "logits/chosen": -0.31709593534469604, |
| "logits/rejected": -0.2760937213897705, |
| "logps/chosen": -0.842248797416687, |
| "logps/rejected": -0.8068034648895264, |
| "loss": 1.498, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -1.684497594833374, |
| "rewards/margins": -0.07089066505432129, |
| "rewards/rejected": -1.6136069297790527, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.12051536174430129, |
| "grad_norm": 0.6877723932266235, |
| "learning_rate": 9.988886979468643e-07, |
| "logits/chosen": -0.41800016164779663, |
| "logits/rejected": -0.4011584222316742, |
| "logps/chosen": -0.7845420837402344, |
| "logps/rejected": -0.834447979927063, |
| "loss": 1.3491, |
| "rewards/accuracies": 0.546875, |
| "rewards/chosen": -1.5690841674804688, |
| "rewards/margins": 0.09981165081262589, |
| "rewards/rejected": -1.668895959854126, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.12262966633630658, |
| "grad_norm": 0.9649701714515686, |
| "learning_rate": 9.98628141419305e-07, |
| "logits/chosen": -0.4253537058830261, |
| "logits/rejected": -0.4305458962917328, |
| "logps/chosen": -0.86476731300354, |
| "logps/rejected": -0.9080386161804199, |
| "loss": 1.3639, |
| "rewards/accuracies": 0.515625, |
| "rewards/chosen": -1.72953462600708, |
| "rewards/margins": 0.08654248714447021, |
| "rewards/rejected": -1.8160772323608398, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.12474397092831185, |
| "grad_norm": 1.3779780864715576, |
| "learning_rate": 9.98340210603829e-07, |
| "logits/chosen": -0.39970022439956665, |
| "logits/rejected": -0.441428005695343, |
| "logps/chosen": -0.8662775158882141, |
| "logps/rejected": -0.9646260738372803, |
| "loss": 1.3001, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -1.7325550317764282, |
| "rewards/margins": 0.19669723510742188, |
| "rewards/rejected": -1.9292521476745605, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.12685827552031714, |
| "grad_norm": 0.5366966724395752, |
| "learning_rate": 9.980249213076084e-07, |
| "logits/chosen": -0.37770116329193115, |
| "logits/rejected": -0.35231757164001465, |
| "logps/chosen": -0.8165755867958069, |
| "logps/rejected": -0.8619179129600525, |
| "loss": 1.3699, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -1.6331511735916138, |
| "rewards/margins": 0.09068439900875092, |
| "rewards/rejected": -1.723835825920105, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.12897258011232243, |
| "grad_norm": 0.36810922622680664, |
| "learning_rate": 9.976822908397748e-07, |
| "logits/chosen": -0.4224976897239685, |
| "logits/rejected": -0.41758257150650024, |
| "logps/chosen": -0.8445641994476318, |
| "logps/rejected": -0.9393664598464966, |
| "loss": 1.3193, |
| "rewards/accuracies": 0.5390625, |
| "rewards/chosen": -1.6891283988952637, |
| "rewards/margins": 0.18960458040237427, |
| "rewards/rejected": -1.8787329196929932, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.13108688470432772, |
| "grad_norm": 0.6838279366493225, |
| "learning_rate": 9.97312338010468e-07, |
| "logits/chosen": -0.4168627858161926, |
| "logits/rejected": -0.36115381121635437, |
| "logps/chosen": -0.8370552659034729, |
| "logps/rejected": -0.8352169394493103, |
| "loss": 1.4284, |
| "rewards/accuracies": 0.5703125, |
| "rewards/chosen": -1.6741105318069458, |
| "rewards/margins": -0.0036766715347766876, |
| "rewards/rejected": -1.6704338788986206, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.133201189296333, |
| "grad_norm": 0.39330533146858215, |
| "learning_rate": 9.969150831298037e-07, |
| "logits/chosen": -0.4558233618736267, |
| "logits/rejected": -0.4025765061378479, |
| "logps/chosen": -0.826255738735199, |
| "logps/rejected": -0.894213080406189, |
| "loss": 1.3485, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -1.652511477470398, |
| "rewards/margins": 0.13591471314430237, |
| "rewards/rejected": -1.788426160812378, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.1353154938883383, |
| "grad_norm": 0.6055929660797119, |
| "learning_rate": 9.964905480067584e-07, |
| "logits/chosen": -0.459463506937027, |
| "logits/rejected": -0.42943331599235535, |
| "logps/chosen": -0.7901928424835205, |
| "logps/rejected": -0.7964221239089966, |
| "loss": 1.4057, |
| "rewards/accuracies": 0.484375, |
| "rewards/chosen": -1.580385684967041, |
| "rewards/margins": 0.012458762153983116, |
| "rewards/rejected": -1.5928442478179932, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.13742979848034356, |
| "grad_norm": 0.37883859872817993, |
| "learning_rate": 9.960387559479725e-07, |
| "logits/chosen": -0.4447207450866699, |
| "logits/rejected": -0.371269553899765, |
| "logps/chosen": -0.7863065004348755, |
| "logps/rejected": -0.7983666658401489, |
| "loss": 1.4202, |
| "rewards/accuracies": 0.4609375, |
| "rewards/chosen": -1.572613000869751, |
| "rewards/margins": 0.02412020042538643, |
| "rewards/rejected": -1.5967333316802979, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.13954410307234885, |
| "grad_norm": 0.31330156326293945, |
| "learning_rate": 9.955597317564703e-07, |
| "logits/chosen": -0.42059677839279175, |
| "logits/rejected": -0.37605100870132446, |
| "logps/chosen": -0.7669360637664795, |
| "logps/rejected": -0.8348797559738159, |
| "loss": 1.3368, |
| "rewards/accuracies": 0.5390625, |
| "rewards/chosen": -1.533872127532959, |
| "rewards/margins": 0.13588732481002808, |
| "rewards/rejected": -1.6697595119476318, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.14165840766435414, |
| "grad_norm": 0.4353170096874237, |
| "learning_rate": 9.950535017302983e-07, |
| "logits/chosen": -0.3897082805633545, |
| "logits/rejected": -0.38229796290397644, |
| "logps/chosen": -0.7249190807342529, |
| "logps/rejected": -0.7696882486343384, |
| "loss": 1.3511, |
| "rewards/accuracies": 0.5546875, |
| "rewards/chosen": -1.4498381614685059, |
| "rewards/margins": 0.08953814208507538, |
| "rewards/rejected": -1.5393764972686768, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.14377271225635943, |
| "grad_norm": 0.6724106669425964, |
| "learning_rate": 9.94520093661082e-07, |
| "logits/chosen": -0.3687596023082733, |
| "logits/rejected": -0.34222811460494995, |
| "logps/chosen": -0.7845972776412964, |
| "logps/rejected": -0.8308086395263672, |
| "loss": 1.3737, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -1.5691945552825928, |
| "rewards/margins": 0.09242270141839981, |
| "rewards/rejected": -1.6616172790527344, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.14588701684836472, |
| "grad_norm": 0.7312172651290894, |
| "learning_rate": 9.939595368324994e-07, |
| "logits/chosen": -0.4475817382335663, |
| "logits/rejected": -0.3975730538368225, |
| "logps/chosen": -0.7314785718917847, |
| "logps/rejected": -0.7924487590789795, |
| "loss": 1.3439, |
| "rewards/accuracies": 0.5390625, |
| "rewards/chosen": -1.4629571437835693, |
| "rewards/margins": 0.12194043397903442, |
| "rewards/rejected": -1.584897518157959, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.14800132144037, |
| "grad_norm": 0.301097571849823, |
| "learning_rate": 9.933718620186744e-07, |
| "logits/chosen": -0.402032732963562, |
| "logits/rejected": -0.3640722632408142, |
| "logps/chosen": -0.7727882862091064, |
| "logps/rejected": -0.8291516304016113, |
| "loss": 1.358, |
| "rewards/accuracies": 0.5546875, |
| "rewards/chosen": -1.545576572418213, |
| "rewards/margins": 0.11272668838500977, |
| "rewards/rejected": -1.6583032608032227, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.1501156260323753, |
| "grad_norm": 0.377835750579834, |
| "learning_rate": 9.92757101482486e-07, |
| "logits/chosen": -0.316825270652771, |
| "logits/rejected": -0.3245603144168854, |
| "logps/chosen": -0.7962774634361267, |
| "logps/rejected": -0.8610175848007202, |
| "loss": 1.3464, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -1.5925549268722534, |
| "rewards/margins": 0.12948019802570343, |
| "rewards/rejected": -1.7220351696014404, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.1522299306243806, |
| "grad_norm": 0.84058678150177, |
| "learning_rate": 9.921152889737984e-07, |
| "logits/chosen": -0.4446060359477997, |
| "logits/rejected": -0.43160340189933777, |
| "logps/chosen": -0.7745426297187805, |
| "logps/rejected": -0.8286185264587402, |
| "loss": 1.3408, |
| "rewards/accuracies": 0.5703125, |
| "rewards/chosen": -1.549085259437561, |
| "rewards/margins": 0.10815180093050003, |
| "rewards/rejected": -1.6572370529174805, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.15434423521638585, |
| "grad_norm": 0.6970808506011963, |
| "learning_rate": 9.91446459727607e-07, |
| "logits/chosen": -0.4220297634601593, |
| "logits/rejected": -0.404453307390213, |
| "logps/chosen": -0.8769615888595581, |
| "logps/rejected": -0.9564313292503357, |
| "loss": 1.3423, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -1.7539231777191162, |
| "rewards/margins": 0.15893957018852234, |
| "rewards/rejected": -1.9128626585006714, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.15645853980839114, |
| "grad_norm": 0.5900676250457764, |
| "learning_rate": 9.90750650462105e-07, |
| "logits/chosen": -0.41884100437164307, |
| "logits/rejected": -0.38551777601242065, |
| "logps/chosen": -0.814996063709259, |
| "logps/rejected": -0.8892688751220703, |
| "loss": 1.3325, |
| "rewards/accuracies": 0.5546875, |
| "rewards/chosen": -1.629992127418518, |
| "rewards/margins": 0.14854571223258972, |
| "rewards/rejected": -1.7785377502441406, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.15857284440039643, |
| "grad_norm": 0.7245749831199646, |
| "learning_rate": 9.900278993766668e-07, |
| "logits/chosen": -0.3451727330684662, |
| "logits/rejected": -0.3348972201347351, |
| "logps/chosen": -0.8788102865219116, |
| "logps/rejected": -0.9273182153701782, |
| "loss": 1.377, |
| "rewards/accuracies": 0.5078125, |
| "rewards/chosen": -1.7576205730438232, |
| "rewards/margins": 0.09701582789421082, |
| "rewards/rejected": -1.8546364307403564, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.16068714899240172, |
| "grad_norm": 0.3135383725166321, |
| "learning_rate": 9.89278246149752e-07, |
| "logits/chosen": -0.4140404760837555, |
| "logits/rejected": -0.38082340359687805, |
| "logps/chosen": -0.7513999342918396, |
| "logps/rejected": -0.7880118489265442, |
| "loss": 1.3921, |
| "rewards/accuracies": 0.53125, |
| "rewards/chosen": -1.5027998685836792, |
| "rewards/margins": 0.07322371751070023, |
| "rewards/rejected": -1.5760236978530884, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.162801453584407, |
| "grad_norm": 0.8594076633453369, |
| "learning_rate": 9.885017319367252e-07, |
| "logits/chosen": -0.35951656103134155, |
| "logits/rejected": -0.30456626415252686, |
| "logps/chosen": -0.7989844679832458, |
| "logps/rejected": -0.8145395517349243, |
| "loss": 1.4061, |
| "rewards/accuracies": 0.4765625, |
| "rewards/chosen": -1.5979689359664917, |
| "rewards/margins": 0.031110182404518127, |
| "rewards/rejected": -1.6290791034698486, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.1649157581764123, |
| "grad_norm": 0.4393538534641266, |
| "learning_rate": 9.876983993675989e-07, |
| "logits/chosen": -0.33119240403175354, |
| "logits/rejected": -0.31970253586769104, |
| "logps/chosen": -0.721772313117981, |
| "logps/rejected": -0.7868390679359436, |
| "loss": 1.3325, |
| "rewards/accuracies": 0.5390625, |
| "rewards/chosen": -1.443544626235962, |
| "rewards/margins": 0.13013358414173126, |
| "rewards/rejected": -1.5736781358718872, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.1670300627684176, |
| "grad_norm": 0.8017925024032593, |
| "learning_rate": 9.868682925446909e-07, |
| "logits/chosen": -0.3608989417552948, |
| "logits/rejected": -0.3497124910354614, |
| "logps/chosen": -0.8117240071296692, |
| "logps/rejected": -0.8506529331207275, |
| "loss": 1.3671, |
| "rewards/accuracies": 0.53125, |
| "rewards/chosen": -1.6234480142593384, |
| "rewards/margins": 0.07785768806934357, |
| "rewards/rejected": -1.701305866241455, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.16914436736042285, |
| "grad_norm": 0.5704994201660156, |
| "learning_rate": 9.860114570402054e-07, |
| "logits/chosen": -0.37441548705101013, |
| "logits/rejected": -0.32188406586647034, |
| "logps/chosen": -0.745419442653656, |
| "logps/rejected": -0.816170871257782, |
| "loss": 1.3315, |
| "rewards/accuracies": 0.484375, |
| "rewards/chosen": -1.490838885307312, |
| "rewards/margins": 0.1415030062198639, |
| "rewards/rejected": -1.632341742515564, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.17125867195242814, |
| "grad_norm": 3.0005106925964355, |
| "learning_rate": 9.85127939893729e-07, |
| "logits/chosen": -0.34791454672813416, |
| "logits/rejected": -0.32542383670806885, |
| "logps/chosen": -0.7547991275787354, |
| "logps/rejected": -0.7868378162384033, |
| "loss": 1.3742, |
| "rewards/accuracies": 0.515625, |
| "rewards/chosen": -1.5095982551574707, |
| "rewards/margins": 0.06407731771469116, |
| "rewards/rejected": -1.5736756324768066, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.17337297654443343, |
| "grad_norm": 0.5891271829605103, |
| "learning_rate": 9.842177896096493e-07, |
| "logits/chosen": -0.38649702072143555, |
| "logits/rejected": -0.36892226338386536, |
| "logps/chosen": -0.7556143999099731, |
| "logps/rejected": -0.82858806848526, |
| "loss": 1.3233, |
| "rewards/accuracies": 0.5703125, |
| "rewards/chosen": -1.5112287998199463, |
| "rewards/margins": 0.14594702422618866, |
| "rewards/rejected": -1.65717613697052, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.17548728113643872, |
| "grad_norm": 0.27861157059669495, |
| "learning_rate": 9.832810561544923e-07, |
| "logits/chosen": -0.38264670968055725, |
| "logits/rejected": -0.35908499360084534, |
| "logps/chosen": -0.7858557105064392, |
| "logps/rejected": -0.8571599721908569, |
| "loss": 1.3234, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -1.5717114210128784, |
| "rewards/margins": 0.1426086127758026, |
| "rewards/rejected": -1.7143199443817139, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.177601585728444, |
| "grad_norm": 0.8158763647079468, |
| "learning_rate": 9.823177909541793e-07, |
| "logits/chosen": -0.4076104760169983, |
| "logits/rejected": -0.3934200704097748, |
| "logps/chosen": -0.8089872002601624, |
| "logps/rejected": -0.8885407447814941, |
| "loss": 1.3476, |
| "rewards/accuracies": 0.5390625, |
| "rewards/chosen": -1.6179744005203247, |
| "rewards/margins": 0.15910708904266357, |
| "rewards/rejected": -1.7770814895629883, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.1797158903204493, |
| "grad_norm": 0.46008333563804626, |
| "learning_rate": 9.813280468912022e-07, |
| "logits/chosen": -0.33124151825904846, |
| "logits/rejected": -0.34535717964172363, |
| "logps/chosen": -0.733020544052124, |
| "logps/rejected": -0.8716557621955872, |
| "loss": 1.2807, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -1.466041088104248, |
| "rewards/margins": 0.27727028727531433, |
| "rewards/rejected": -1.7433115243911743, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.18183019491245458, |
| "grad_norm": 0.3784334659576416, |
| "learning_rate": 9.80311878301722e-07, |
| "logits/chosen": -0.40713849663734436, |
| "logits/rejected": -0.3808574080467224, |
| "logps/chosen": -0.7063947319984436, |
| "logps/rejected": -0.7589148879051208, |
| "loss": 1.3501, |
| "rewards/accuracies": 0.515625, |
| "rewards/chosen": -1.4127894639968872, |
| "rewards/margins": 0.10504024475812912, |
| "rewards/rejected": -1.5178297758102417, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.18394449950445987, |
| "grad_norm": 0.6137813329696655, |
| "learning_rate": 9.792693409725853e-07, |
| "logits/chosen": -0.4119255542755127, |
| "logits/rejected": -0.44221603870391846, |
| "logps/chosen": -0.795850932598114, |
| "logps/rejected": -0.8925026059150696, |
| "loss": 1.2987, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -1.591701865196228, |
| "rewards/margins": 0.19330324232578278, |
| "rewards/rejected": -1.7850052118301392, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.18605880409646514, |
| "grad_norm": 0.5354250073432922, |
| "learning_rate": 9.78200492138261e-07, |
| "logits/chosen": -0.3792279064655304, |
| "logits/rejected": -0.3789527714252472, |
| "logps/chosen": -0.7249161005020142, |
| "logps/rejected": -0.8088154792785645, |
| "loss": 1.3304, |
| "rewards/accuracies": 0.515625, |
| "rewards/chosen": -1.4498322010040283, |
| "rewards/margins": 0.16779886186122894, |
| "rewards/rejected": -1.617630958557129, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.18817310868847043, |
| "grad_norm": 0.6911243796348572, |
| "learning_rate": 9.771053904776995e-07, |
| "logits/chosen": -0.38837429881095886, |
| "logits/rejected": -0.36597418785095215, |
| "logps/chosen": -0.7528612017631531, |
| "logps/rejected": -0.7981135249137878, |
| "loss": 1.3481, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": -1.5057224035263062, |
| "rewards/margins": 0.09050464630126953, |
| "rewards/rejected": -1.5962270498275757, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.19028741328047571, |
| "grad_norm": 0.37110790610313416, |
| "learning_rate": 9.759840961111097e-07, |
| "logits/chosen": -0.3804919421672821, |
| "logits/rejected": -0.38750600814819336, |
| "logps/chosen": -0.8673248291015625, |
| "logps/rejected": -0.9381619691848755, |
| "loss": 1.3303, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -1.734649658203125, |
| "rewards/margins": 0.14167429506778717, |
| "rewards/rejected": -1.876323938369751, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.192401717872481, |
| "grad_norm": 0.8033086657524109, |
| "learning_rate": 9.748366705966593e-07, |
| "logits/chosen": -0.3804866075515747, |
| "logits/rejected": -0.31055447459220886, |
| "logps/chosen": -0.7535511255264282, |
| "logps/rejected": -0.7824290990829468, |
| "loss": 1.3706, |
| "rewards/accuracies": 0.546875, |
| "rewards/chosen": -1.5071022510528564, |
| "rewards/margins": 0.057755980640649796, |
| "rewards/rejected": -1.5648581981658936, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.1945160224644863, |
| "grad_norm": 0.6934167742729187, |
| "learning_rate": 9.736631769270957e-07, |
| "logits/chosen": -0.443461149930954, |
| "logits/rejected": -0.4398806691169739, |
| "logps/chosen": -0.8123858571052551, |
| "logps/rejected": -0.8972252607345581, |
| "loss": 1.3464, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -1.6247717142105103, |
| "rewards/margins": 0.16967862844467163, |
| "rewards/rejected": -1.7944505214691162, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.19663032705649158, |
| "grad_norm": 1.140067458152771, |
| "learning_rate": 9.724636795262866e-07, |
| "logits/chosen": -0.43793433904647827, |
| "logits/rejected": -0.4402340352535248, |
| "logps/chosen": -0.8155819177627563, |
| "logps/rejected": -0.8659977912902832, |
| "loss": 1.3621, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -1.6311638355255127, |
| "rewards/margins": 0.10083187371492386, |
| "rewards/rejected": -1.7319955825805664, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.19874463164849687, |
| "grad_norm": 1.8303897380828857, |
| "learning_rate": 9.712382442456844e-07, |
| "logits/chosen": -0.34288379549980164, |
| "logits/rejected": -0.36632782220840454, |
| "logps/chosen": -0.7338054776191711, |
| "logps/rejected": -0.8537961840629578, |
| "loss": 1.2942, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -1.4676109552383423, |
| "rewards/margins": 0.23998141288757324, |
| "rewards/rejected": -1.7075923681259155, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.20085893624050213, |
| "grad_norm": 0.34392252564430237, |
| "learning_rate": 9.6998693836071e-07, |
| "logits/chosen": -0.4381723999977112, |
| "logits/rejected": -0.4031081199645996, |
| "logps/chosen": -0.7130292057991028, |
| "logps/rejected": -0.7402217388153076, |
| "loss": 1.3766, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -1.4260584115982056, |
| "rewards/margins": 0.0543849840760231, |
| "rewards/rejected": -1.4804434776306152, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.20297324083250742, |
| "grad_norm": 0.4129842519760132, |
| "learning_rate": 9.687098305670604e-07, |
| "logits/chosen": -0.39796924591064453, |
| "logits/rejected": -0.3476859927177429, |
| "logps/chosen": -0.7520885467529297, |
| "logps/rejected": -0.8058558702468872, |
| "loss": 1.3663, |
| "rewards/accuracies": 0.53125, |
| "rewards/chosen": -1.5041770935058594, |
| "rewards/margins": 0.10753461718559265, |
| "rewards/rejected": -1.6117117404937744, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.2050875454245127, |
| "grad_norm": 0.5054985284805298, |
| "learning_rate": 9.674069909769362e-07, |
| "logits/chosen": -0.3942393660545349, |
| "logits/rejected": -0.3627544045448303, |
| "logps/chosen": -0.733702540397644, |
| "logps/rejected": -0.781308650970459, |
| "loss": 1.342, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -1.467405080795288, |
| "rewards/margins": 0.09521210938692093, |
| "rewards/rejected": -1.562617301940918, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.207201850016518, |
| "grad_norm": 0.6975870728492737, |
| "learning_rate": 9.66078491115194e-07, |
| "logits/chosen": -0.38557127118110657, |
| "logits/rejected": -0.3581204414367676, |
| "logps/chosen": -0.7359838485717773, |
| "logps/rejected": -0.7648134827613831, |
| "loss": 1.3841, |
| "rewards/accuracies": 0.5234375, |
| "rewards/chosen": -1.4719676971435547, |
| "rewards/margins": 0.057659298181533813, |
| "rewards/rejected": -1.5296269655227661, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.2093161546085233, |
| "grad_norm": 0.7575029730796814, |
| "learning_rate": 9.647244039154177e-07, |
| "logits/chosen": -0.3871467411518097, |
| "logits/rejected": -0.3941374123096466, |
| "logps/chosen": -0.6516871452331543, |
| "logps/rejected": -0.7066073417663574, |
| "loss": 1.3364, |
| "rewards/accuracies": 0.5390625, |
| "rewards/chosen": -1.3033742904663086, |
| "rewards/margins": 0.10984040796756744, |
| "rewards/rejected": -1.4132146835327148, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.21143045920052858, |
| "grad_norm": 1.3344250917434692, |
| "learning_rate": 9.633448037159166e-07, |
| "logits/chosen": -0.40887755155563354, |
| "logits/rejected": -0.41733911633491516, |
| "logps/chosen": -0.6978950500488281, |
| "logps/rejected": -0.793424129486084, |
| "loss": 1.3076, |
| "rewards/accuracies": 0.609375, |
| "rewards/chosen": -1.3957901000976562, |
| "rewards/margins": 0.1910584717988968, |
| "rewards/rejected": -1.586848258972168, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.21354476379253387, |
| "grad_norm": 0.8798456788063049, |
| "learning_rate": 9.619397662556433e-07, |
| "logits/chosen": -0.302534282207489, |
| "logits/rejected": -0.29954588413238525, |
| "logps/chosen": -0.719552755355835, |
| "logps/rejected": -0.7628123164176941, |
| "loss": 1.3699, |
| "rewards/accuracies": 0.5390625, |
| "rewards/chosen": -1.43910551071167, |
| "rewards/margins": 0.08651915192604065, |
| "rewards/rejected": -1.5256246328353882, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.21565906838453916, |
| "grad_norm": 0.8746365308761597, |
| "learning_rate": 9.605093686700353e-07, |
| "logits/chosen": -0.372263640165329, |
| "logits/rejected": -0.3714321255683899, |
| "logps/chosen": -0.6665956974029541, |
| "logps/rejected": -0.7361368536949158, |
| "loss": 1.3173, |
| "rewards/accuracies": 0.5546875, |
| "rewards/chosen": -1.3331913948059082, |
| "rewards/margins": 0.1390824019908905, |
| "rewards/rejected": -1.4722737073898315, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.21777337297654442, |
| "grad_norm": 0.8450930714607239, |
| "learning_rate": 9.590536894867812e-07, |
| "logits/chosen": -0.37228280305862427, |
| "logits/rejected": -0.37763556838035583, |
| "logps/chosen": -0.7425979375839233, |
| "logps/rejected": -0.7557005882263184, |
| "loss": 1.4085, |
| "rewards/accuracies": 0.5390625, |
| "rewards/chosen": -1.4851958751678467, |
| "rewards/margins": 0.026205357164144516, |
| "rewards/rejected": -1.5114011764526367, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.2198876775685497, |
| "grad_norm": 0.5075035691261292, |
| "learning_rate": 9.575728086215091e-07, |
| "logits/chosen": -0.4433964788913727, |
| "logits/rejected": -0.3782787024974823, |
| "logps/chosen": -0.7308244109153748, |
| "logps/rejected": -0.8043883442878723, |
| "loss": 1.3353, |
| "rewards/accuracies": 0.5703125, |
| "rewards/chosen": -1.4616488218307495, |
| "rewards/margins": 0.14712783694267273, |
| "rewards/rejected": -1.6087766885757446, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.222001982160555, |
| "grad_norm": 1.0270946025848389, |
| "learning_rate": 9.560668073733993e-07, |
| "logits/chosen": -0.3593980073928833, |
| "logits/rejected": -0.3159312903881073, |
| "logps/chosen": -0.757469892501831, |
| "logps/rejected": -0.8256179094314575, |
| "loss": 1.3289, |
| "rewards/accuracies": 0.5546875, |
| "rewards/chosen": -1.514939785003662, |
| "rewards/margins": 0.13629598915576935, |
| "rewards/rejected": -1.651235818862915, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.2241162867525603, |
| "grad_norm": 0.8390078544616699, |
| "learning_rate": 9.54535768420721e-07, |
| "logits/chosen": -0.3266332149505615, |
| "logits/rejected": -0.3008713722229004, |
| "logps/chosen": -0.7286102771759033, |
| "logps/rejected": -0.7803273797035217, |
| "loss": 1.3593, |
| "rewards/accuracies": 0.5078125, |
| "rewards/chosen": -1.4572205543518066, |
| "rewards/margins": 0.10343428701162338, |
| "rewards/rejected": -1.5606547594070435, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.22623059134456558, |
| "grad_norm": 1.130595088005066, |
| "learning_rate": 9.529797758162934e-07, |
| "logits/chosen": -0.36109817028045654, |
| "logits/rejected": -0.34797021746635437, |
| "logps/chosen": -0.7723361253738403, |
| "logps/rejected": -0.8873662352561951, |
| "loss": 1.2956, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -1.5446722507476807, |
| "rewards/margins": 0.23006024956703186, |
| "rewards/rejected": -1.7747324705123901, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.22834489593657087, |
| "grad_norm": 0.9399718642234802, |
| "learning_rate": 9.513989149828717e-07, |
| "logits/chosen": -0.3596777021884918, |
| "logits/rejected": -0.3660539388656616, |
| "logps/chosen": -0.7130635976791382, |
| "logps/rejected": -0.7378955483436584, |
| "loss": 1.3774, |
| "rewards/accuracies": 0.4765625, |
| "rewards/chosen": -1.4261271953582764, |
| "rewards/margins": 0.04966379329562187, |
| "rewards/rejected": -1.475791096687317, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.23045920052857616, |
| "grad_norm": 1.097594976425171, |
| "learning_rate": 9.49793272708457e-07, |
| "logits/chosen": -0.31783169507980347, |
| "logits/rejected": -0.3008044362068176, |
| "logps/chosen": -0.6933202147483826, |
| "logps/rejected": -0.7510000467300415, |
| "loss": 1.3498, |
| "rewards/accuracies": 0.5390625, |
| "rewards/chosen": -1.3866404294967651, |
| "rewards/margins": 0.1153596043586731, |
| "rewards/rejected": -1.502000093460083, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.23257350512058145, |
| "grad_norm": 1.096330165863037, |
| "learning_rate": 9.481629371415313e-07, |
| "logits/chosen": -0.3582899570465088, |
| "logits/rejected": -0.3120020031929016, |
| "logps/chosen": -0.817268431186676, |
| "logps/rejected": -0.8862374424934387, |
| "loss": 1.3349, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -1.634536862373352, |
| "rewards/margins": 0.13793781399726868, |
| "rewards/rejected": -1.7724748849868774, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.2346878097125867, |
| "grad_norm": 0.8261978626251221, |
| "learning_rate": 9.465079977862192e-07, |
| "logits/chosen": -0.41336673498153687, |
| "logits/rejected": -0.39544352889060974, |
| "logps/chosen": -0.7673372030258179, |
| "logps/rejected": -0.8331737518310547, |
| "loss": 1.3373, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -1.5346744060516357, |
| "rewards/margins": 0.1316729635000229, |
| "rewards/rejected": -1.6663475036621094, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.236802114304592, |
| "grad_norm": 0.5922806262969971, |
| "learning_rate": 9.448285454973737e-07, |
| "logits/chosen": -0.3224758207798004, |
| "logits/rejected": -0.3118049204349518, |
| "logps/chosen": -0.7584627866744995, |
| "logps/rejected": -0.8859898447990417, |
| "loss": 1.2731, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -1.516925573348999, |
| "rewards/margins": 0.2550540566444397, |
| "rewards/rejected": -1.7719796895980835, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.2389164188965973, |
| "grad_norm": 1.3172541856765747, |
| "learning_rate": 9.431246724755877e-07, |
| "logits/chosen": -0.4287208318710327, |
| "logits/rejected": -0.3984590172767639, |
| "logps/chosen": -0.7587048411369324, |
| "logps/rejected": -0.7860502004623413, |
| "loss": 1.3832, |
| "rewards/accuracies": 0.4765625, |
| "rewards/chosen": -1.5174096822738647, |
| "rewards/margins": 0.05469079315662384, |
| "rewards/rejected": -1.5721004009246826, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.24103072348860258, |
| "grad_norm": 0.7749882340431213, |
| "learning_rate": 9.413964722621337e-07, |
| "logits/chosen": -0.39085906744003296, |
| "logits/rejected": -0.3316206932067871, |
| "logps/chosen": -0.7035898566246033, |
| "logps/rejected": -0.7375759482383728, |
| "loss": 1.387, |
| "rewards/accuracies": 0.5546875, |
| "rewards/chosen": -1.4071797132492065, |
| "rewards/margins": 0.06797221302986145, |
| "rewards/rejected": -1.4751518964767456, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.24314502808060787, |
| "grad_norm": 1.0914056301116943, |
| "learning_rate": 9.396440397338272e-07, |
| "logits/chosen": -0.38826486468315125, |
| "logits/rejected": -0.35520774126052856, |
| "logps/chosen": -0.7385872602462769, |
| "logps/rejected": -0.7974889278411865, |
| "loss": 1.3477, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -1.4771745204925537, |
| "rewards/margins": 0.11780343949794769, |
| "rewards/rejected": -1.594977855682373, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.24525933267261316, |
| "grad_norm": 1.2966018915176392, |
| "learning_rate": 9.378674710978183e-07, |
| "logits/chosen": -0.36493802070617676, |
| "logits/rejected": -0.34763696789741516, |
| "logps/chosen": -0.6731826663017273, |
| "logps/rejected": -0.7645149827003479, |
| "loss": 1.3, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -1.3463653326034546, |
| "rewards/margins": 0.18266455829143524, |
| "rewards/rejected": -1.5290299654006958, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.24737363726461845, |
| "grad_norm": 0.49401605129241943, |
| "learning_rate": 9.360668638863109e-07, |
| "logits/chosen": -0.40416795015335083, |
| "logits/rejected": -0.3815993070602417, |
| "logps/chosen": -0.719497799873352, |
| "logps/rejected": -0.7588324546813965, |
| "loss": 1.3621, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -1.438995599746704, |
| "rewards/margins": 0.07866920530796051, |
| "rewards/rejected": -1.517664909362793, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.2494879418566237, |
| "grad_norm": 1.0603238344192505, |
| "learning_rate": 9.342423169512071e-07, |
| "logits/chosen": -0.3857055604457855, |
| "logits/rejected": -0.3524513244628906, |
| "logps/chosen": -0.7373769283294678, |
| "logps/rejected": -0.7971038818359375, |
| "loss": 1.3358, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -1.4747538566589355, |
| "rewards/margins": 0.11945393681526184, |
| "rewards/rejected": -1.594207763671875, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.251602246448629, |
| "grad_norm": 0.9880490303039551, |
| "learning_rate": 9.323939304586804e-07, |
| "logits/chosen": -0.31455785036087036, |
| "logits/rejected": -0.3102484941482544, |
| "logps/chosen": -0.7276102900505066, |
| "logps/rejected": -0.7446941137313843, |
| "loss": 1.3928, |
| "rewards/accuracies": 0.546875, |
| "rewards/chosen": -1.4552205801010132, |
| "rewards/margins": 0.034167706966400146, |
| "rewards/rejected": -1.4893882274627686, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.2537165510406343, |
| "grad_norm": 1.0419566631317139, |
| "learning_rate": 9.305218058836776e-07, |
| "logits/chosen": -0.38093918561935425, |
| "logits/rejected": -0.3588898181915283, |
| "logps/chosen": -0.715582013130188, |
| "logps/rejected": -0.8271002769470215, |
| "loss": 1.2934, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": -1.431164026260376, |
| "rewards/margins": 0.22303667664527893, |
| "rewards/rejected": -1.654200553894043, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.2558308556326396, |
| "grad_norm": 0.657620370388031, |
| "learning_rate": 9.286260460043473e-07, |
| "logits/chosen": -0.45690783858299255, |
| "logits/rejected": -0.4082674980163574, |
| "logps/chosen": -0.6932571530342102, |
| "logps/rejected": -0.7631082534790039, |
| "loss": 1.3398, |
| "rewards/accuracies": 0.5546875, |
| "rewards/chosen": -1.3865143060684204, |
| "rewards/margins": 0.13970226049423218, |
| "rewards/rejected": -1.5262165069580078, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.25794516022464486, |
| "grad_norm": 0.983686089515686, |
| "learning_rate": 9.267067548963974e-07, |
| "logits/chosen": -0.40266987681388855, |
| "logits/rejected": -0.37586671113967896, |
| "logps/chosen": -0.7362720966339111, |
| "logps/rejected": -0.7538987398147583, |
| "loss": 1.4066, |
| "rewards/accuracies": 0.5390625, |
| "rewards/chosen": -1.4725441932678223, |
| "rewards/margins": 0.03525342047214508, |
| "rewards/rejected": -1.5077974796295166, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.26005946481665015, |
| "grad_norm": 1.0076361894607544, |
| "learning_rate": 9.24764037927381e-07, |
| "logits/chosen": -0.4461461007595062, |
| "logits/rejected": -0.40700826048851013, |
| "logps/chosen": -0.7206646800041199, |
| "logps/rejected": -0.7489192485809326, |
| "loss": 1.3759, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -1.4413293600082397, |
| "rewards/margins": 0.05650928616523743, |
| "rewards/rejected": -1.4978384971618652, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.26217376940865544, |
| "grad_norm": 0.933315098285675, |
| "learning_rate": 9.22798001750913e-07, |
| "logits/chosen": -0.3966676890850067, |
| "logits/rejected": -0.3572196960449219, |
| "logps/chosen": -0.7075096368789673, |
| "logps/rejected": -0.7406759262084961, |
| "loss": 1.3667, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": -1.4150192737579346, |
| "rewards/margins": 0.06633266806602478, |
| "rewards/rejected": -1.4813518524169922, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.26428807400066073, |
| "grad_norm": 0.6277392506599426, |
| "learning_rate": 9.20808754300814e-07, |
| "logits/chosen": -0.3555490970611572, |
| "logits/rejected": -0.35786163806915283, |
| "logps/chosen": -0.7549921274185181, |
| "logps/rejected": -0.832869291305542, |
| "loss": 1.3175, |
| "rewards/accuracies": 0.5546875, |
| "rewards/chosen": -1.5099842548370361, |
| "rewards/margins": 0.15575438737869263, |
| "rewards/rejected": -1.665738582611084, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.266402378592666, |
| "grad_norm": 0.7172744274139404, |
| "learning_rate": 9.18796404785185e-07, |
| "logits/chosen": -0.41230690479278564, |
| "logits/rejected": -0.39935630559921265, |
| "logps/chosen": -0.7129833698272705, |
| "logps/rejected": -0.7888559103012085, |
| "loss": 1.3167, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -1.425966739654541, |
| "rewards/margins": 0.15174514055252075, |
| "rewards/rejected": -1.577711820602417, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.2685166831846713, |
| "grad_norm": 1.2629508972167969, |
| "learning_rate": 9.16761063680412e-07, |
| "logits/chosen": -0.36754000186920166, |
| "logits/rejected": -0.3541562259197235, |
| "logps/chosen": -0.6992133855819702, |
| "logps/rejected": -0.7668892741203308, |
| "loss": 1.3735, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -1.3984267711639404, |
| "rewards/margins": 0.13535188138484955, |
| "rewards/rejected": -1.5337785482406616, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.2706309877766766, |
| "grad_norm": 0.7024405598640442, |
| "learning_rate": 9.147028427251009e-07, |
| "logits/chosen": -0.4014585018157959, |
| "logits/rejected": -0.40560898184776306, |
| "logps/chosen": -0.727234959602356, |
| "logps/rejected": -0.8070081472396851, |
| "loss": 1.3138, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -1.454469919204712, |
| "rewards/margins": 0.15954652428627014, |
| "rewards/rejected": -1.6140162944793701, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.2727452923686819, |
| "grad_norm": 1.6173532009124756, |
| "learning_rate": 9.126218549139433e-07, |
| "logits/chosen": -0.32572367787361145, |
| "logits/rejected": -0.3470613956451416, |
| "logps/chosen": -0.7555541396141052, |
| "logps/rejected": -0.8856738209724426, |
| "loss": 1.2461, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -1.5111082792282104, |
| "rewards/margins": 0.26023951172828674, |
| "rewards/rejected": -1.7713476419448853, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.2748595969606871, |
| "grad_norm": 0.5878487229347229, |
| "learning_rate": 9.105182144915129e-07, |
| "logits/chosen": -0.39267170429229736, |
| "logits/rejected": -0.3448992967605591, |
| "logps/chosen": -0.6776289343833923, |
| "logps/rejected": -0.7530183792114258, |
| "loss": 1.3242, |
| "rewards/accuracies": 0.609375, |
| "rewards/chosen": -1.3552578687667847, |
| "rewards/margins": 0.15077897906303406, |
| "rewards/rejected": -1.5060367584228516, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.2769739015526924, |
| "grad_norm": 0.43264809250831604, |
| "learning_rate": 9.08392036945994e-07, |
| "logits/chosen": -0.39980950951576233, |
| "logits/rejected": -0.4247930645942688, |
| "logps/chosen": -0.7898982167243958, |
| "logps/rejected": -0.8856299519538879, |
| "loss": 1.3004, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -1.5797964334487915, |
| "rewards/margins": 0.19146347045898438, |
| "rewards/rejected": -1.7712599039077759, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.2790882061446977, |
| "grad_norm": 1.0348538160324097, |
| "learning_rate": 9.062434390028407e-07, |
| "logits/chosen": -0.35729700326919556, |
| "logits/rejected": -0.3265542984008789, |
| "logps/chosen": -0.7120587229728699, |
| "logps/rejected": -0.771691083908081, |
| "loss": 1.3374, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -1.4241174459457397, |
| "rewards/margins": 0.11926469206809998, |
| "rewards/rejected": -1.543382167816162, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.281202510736703, |
| "grad_norm": 2.0902225971221924, |
| "learning_rate": 9.04072538618369e-07, |
| "logits/chosen": -0.4942469298839569, |
| "logits/rejected": -0.48699846863746643, |
| "logps/chosen": -0.7882512211799622, |
| "logps/rejected": -0.8270165920257568, |
| "loss": 1.3715, |
| "rewards/accuracies": 0.5703125, |
| "rewards/chosen": -1.5765024423599243, |
| "rewards/margins": 0.07753071188926697, |
| "rewards/rejected": -1.6540331840515137, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.2833168153287083, |
| "grad_norm": 1.6436113119125366, |
| "learning_rate": 9.018794549732817e-07, |
| "logits/chosen": -0.41133156418800354, |
| "logits/rejected": -0.4146718382835388, |
| "logps/chosen": -0.779824435710907, |
| "logps/rejected": -0.9421006441116333, |
| "loss": 1.2521, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -1.559648871421814, |
| "rewards/margins": 0.324552446603775, |
| "rewards/rejected": -1.8842012882232666, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.28543111992071357, |
| "grad_norm": 0.8831859827041626, |
| "learning_rate": 8.996643084661244e-07, |
| "logits/chosen": -0.42452165484428406, |
| "logits/rejected": -0.3798604905605316, |
| "logps/chosen": -0.6499216556549072, |
| "logps/rejected": -0.7796702980995178, |
| "loss": 1.2581, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -1.2998433113098145, |
| "rewards/margins": 0.25949734449386597, |
| "rewards/rejected": -1.5593405961990356, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.28754542451271886, |
| "grad_norm": 0.8031218647956848, |
| "learning_rate": 8.974272207066767e-07, |
| "logits/chosen": -0.38131940364837646, |
| "logits/rejected": -0.3854255676269531, |
| "logps/chosen": -0.7026851773262024, |
| "logps/rejected": -0.762391209602356, |
| "loss": 1.3333, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -1.4053703546524048, |
| "rewards/margins": 0.11941206455230713, |
| "rewards/rejected": -1.524782419204712, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.28965972910472415, |
| "grad_norm": 1.4455821514129639, |
| "learning_rate": 8.951683145092748e-07, |
| "logits/chosen": -0.42824965715408325, |
| "logits/rejected": -0.4320424795150757, |
| "logps/chosen": -0.7893270254135132, |
| "logps/rejected": -0.8517144322395325, |
| "loss": 1.3652, |
| "rewards/accuracies": 0.5234375, |
| "rewards/chosen": -1.5786540508270264, |
| "rewards/margins": 0.12477481365203857, |
| "rewards/rejected": -1.703428864479065, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.29177403369672944, |
| "grad_norm": 0.6299450397491455, |
| "learning_rate": 8.928877138860706e-07, |
| "logits/chosen": -0.4388589560985565, |
| "logits/rejected": -0.40156903862953186, |
| "logps/chosen": -0.7346572875976562, |
| "logps/rejected": -0.8166492581367493, |
| "loss": 1.3134, |
| "rewards/accuracies": 0.5703125, |
| "rewards/chosen": -1.4693145751953125, |
| "rewards/margins": 0.16398391127586365, |
| "rewards/rejected": -1.6332985162734985, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.29388833828873473, |
| "grad_norm": 2.784437417984009, |
| "learning_rate": 8.905855440402224e-07, |
| "logits/chosen": -0.405662477016449, |
| "logits/rejected": -0.35549795627593994, |
| "logps/chosen": -0.7482771277427673, |
| "logps/rejected": -0.795568585395813, |
| "loss": 1.3656, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -1.4965542554855347, |
| "rewards/margins": 0.09458285570144653, |
| "rewards/rejected": -1.591137170791626, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.29600264288074, |
| "grad_norm": 0.4958692193031311, |
| "learning_rate": 8.882619313590212e-07, |
| "logits/chosen": -0.3814452886581421, |
| "logits/rejected": -0.35715553164482117, |
| "logps/chosen": -0.7731542587280273, |
| "logps/rejected": -0.8285202980041504, |
| "loss": 1.3776, |
| "rewards/accuracies": 0.515625, |
| "rewards/chosen": -1.5463085174560547, |
| "rewards/margins": 0.11073210835456848, |
| "rewards/rejected": -1.6570405960083008, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.2981169474727453, |
| "grad_norm": 0.4597362279891968, |
| "learning_rate": 8.859170034069532e-07, |
| "logits/chosen": -0.388383150100708, |
| "logits/rejected": -0.4071737229824066, |
| "logps/chosen": -0.7263504266738892, |
| "logps/rejected": -0.769676148891449, |
| "loss": 1.3712, |
| "rewards/accuracies": 0.53125, |
| "rewards/chosen": -1.4527008533477783, |
| "rewards/margins": 0.08665145933628082, |
| "rewards/rejected": -1.539352297782898, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.3002312520647506, |
| "grad_norm": 0.4914930760860443, |
| "learning_rate": 8.835508889186956e-07, |
| "logits/chosen": -0.41084378957748413, |
| "logits/rejected": -0.3823031187057495, |
| "logps/chosen": -0.7565821409225464, |
| "logps/rejected": -0.9084322452545166, |
| "loss": 1.2717, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -1.5131642818450928, |
| "rewards/margins": 0.3037002384662628, |
| "rewards/rejected": -1.8168644905090332, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.3023455566567559, |
| "grad_norm": 2.0075581073760986, |
| "learning_rate": 8.811637177920499e-07, |
| "logits/chosen": -0.4438302516937256, |
| "logits/rejected": -0.4916025698184967, |
| "logps/chosen": -0.800719141960144, |
| "logps/rejected": -0.8658267855644226, |
| "loss": 1.358, |
| "rewards/accuracies": 0.515625, |
| "rewards/chosen": -1.601438283920288, |
| "rewards/margins": 0.1302153617143631, |
| "rewards/rejected": -1.7316535711288452, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.3044598612487612, |
| "grad_norm": 1.1243022680282593, |
| "learning_rate": 8.7875562108081e-07, |
| "logits/chosen": -0.40519949793815613, |
| "logits/rejected": -0.3905750811100006, |
| "logps/chosen": -0.689585268497467, |
| "logps/rejected": -0.7312421798706055, |
| "loss": 1.3503, |
| "rewards/accuracies": 0.5703125, |
| "rewards/chosen": -1.379170536994934, |
| "rewards/margins": 0.08331384509801865, |
| "rewards/rejected": -1.462484359741211, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.3065741658407664, |
| "grad_norm": 0.7543137669563293, |
| "learning_rate": 8.76326730987568e-07, |
| "logits/chosen": -0.4696752727031708, |
| "logits/rejected": -0.4357326626777649, |
| "logps/chosen": -0.7813425660133362, |
| "logps/rejected": -0.8276973962783813, |
| "loss": 1.3794, |
| "rewards/accuracies": 0.5234375, |
| "rewards/chosen": -1.5626851320266724, |
| "rewards/margins": 0.09270970523357391, |
| "rewards/rejected": -1.6553947925567627, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.3086884704327717, |
| "grad_norm": 1.3136053085327148, |
| "learning_rate": 8.738771808564555e-07, |
| "logits/chosen": -0.4262731075286865, |
| "logits/rejected": -0.44038820266723633, |
| "logps/chosen": -0.697494387626648, |
| "logps/rejected": -0.8369535803794861, |
| "loss": 1.2699, |
| "rewards/accuracies": 0.609375, |
| "rewards/chosen": -1.394988775253296, |
| "rewards/margins": 0.2789183557033539, |
| "rewards/rejected": -1.6739071607589722, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.310802775024777, |
| "grad_norm": 2.221562385559082, |
| "learning_rate": 8.714071051658245e-07, |
| "logits/chosen": -0.40089336037635803, |
| "logits/rejected": -0.37991875410079956, |
| "logps/chosen": -0.7704445123672485, |
| "logps/rejected": -0.859091579914093, |
| "loss": 1.2987, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -1.540889024734497, |
| "rewards/margins": 0.17729414999485016, |
| "rewards/rejected": -1.718183159828186, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.3129170796167823, |
| "grad_norm": 1.5049912929534912, |
| "learning_rate": 8.689166395208636e-07, |
| "logits/chosen": -0.38984015583992004, |
| "logits/rejected": -0.35900723934173584, |
| "logps/chosen": -0.6424779891967773, |
| "logps/rejected": -0.7145389318466187, |
| "loss": 1.3261, |
| "rewards/accuracies": 0.5703125, |
| "rewards/chosen": -1.2849559783935547, |
| "rewards/margins": 0.14412200450897217, |
| "rewards/rejected": -1.4290778636932373, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.31503138420878757, |
| "grad_norm": 0.36125388741493225, |
| "learning_rate": 8.664059206461534e-07, |
| "logits/chosen": -0.3490441143512726, |
| "logits/rejected": -0.3219914436340332, |
| "logps/chosen": -0.7200264930725098, |
| "logps/rejected": -0.7924249768257141, |
| "loss": 1.3476, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -1.4400529861450195, |
| "rewards/margins": 0.1447969526052475, |
| "rewards/rejected": -1.5848499536514282, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.31714568880079286, |
| "grad_norm": 1.039840579032898, |
| "learning_rate": 8.638750863781612e-07, |
| "logits/chosen": -0.40701645612716675, |
| "logits/rejected": -0.406186580657959, |
| "logps/chosen": -0.7083575129508972, |
| "logps/rejected": -0.7766748070716858, |
| "loss": 1.3263, |
| "rewards/accuracies": 0.5390625, |
| "rewards/chosen": -1.4167150259017944, |
| "rewards/margins": 0.1366347074508667, |
| "rewards/rejected": -1.5533496141433716, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.31925999339279815, |
| "grad_norm": 0.7128564119338989, |
| "learning_rate": 8.613242756576728e-07, |
| "logits/chosen": -0.40932926535606384, |
| "logits/rejected": -0.4234562814235687, |
| "logps/chosen": -0.6775843501091003, |
| "logps/rejected": -0.7866222858428955, |
| "loss": 1.2834, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": -1.3551687002182007, |
| "rewards/margins": 0.2180757373571396, |
| "rewards/rejected": -1.573244571685791, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.32137429798480344, |
| "grad_norm": 1.1701059341430664, |
| "learning_rate": 8.587536285221655e-07, |
| "logits/chosen": -0.3654797077178955, |
| "logits/rejected": -0.3181680738925934, |
| "logps/chosen": -0.6686022877693176, |
| "logps/rejected": -0.7058504223823547, |
| "loss": 1.3612, |
| "rewards/accuracies": 0.546875, |
| "rewards/chosen": -1.3372045755386353, |
| "rewards/margins": 0.07449636608362198, |
| "rewards/rejected": -1.4117008447647095, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.3234886025768087, |
| "grad_norm": 0.8239700794219971, |
| "learning_rate": 8.561632860981204e-07, |
| "logits/chosen": -0.42527130246162415, |
| "logits/rejected": -0.4091627299785614, |
| "logps/chosen": -0.6969794631004333, |
| "logps/rejected": -0.8019355535507202, |
| "loss": 1.2974, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -1.3939589262008667, |
| "rewards/margins": 0.20991206169128418, |
| "rewards/rejected": -1.6038711071014404, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.325602907168814, |
| "grad_norm": 1.4885636568069458, |
| "learning_rate": 8.535533905932737e-07, |
| "logits/chosen": -0.4126192331314087, |
| "logits/rejected": -0.41548141837120056, |
| "logps/chosen": -0.7076549530029297, |
| "logps/rejected": -0.7940821051597595, |
| "loss": 1.3198, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -1.4153099060058594, |
| "rewards/margins": 0.17285437881946564, |
| "rewards/rejected": -1.588164210319519, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.3277172117608193, |
| "grad_norm": 1.439434289932251, |
| "learning_rate": 8.509240852888106e-07, |
| "logits/chosen": -0.3763914704322815, |
| "logits/rejected": -0.3617165684700012, |
| "logps/chosen": -0.7189474105834961, |
| "logps/rejected": -0.827629804611206, |
| "loss": 1.2816, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -1.4378948211669922, |
| "rewards/margins": 0.2173648476600647, |
| "rewards/rejected": -1.655259609222412, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.3298315163528246, |
| "grad_norm": 1.4505418539047241, |
| "learning_rate": 8.482755145314985e-07, |
| "logits/chosen": -0.37879478931427, |
| "logits/rejected": -0.38689684867858887, |
| "logps/chosen": -0.7011865973472595, |
| "logps/rejected": -0.8019431829452515, |
| "loss": 1.3158, |
| "rewards/accuracies": 0.5546875, |
| "rewards/chosen": -1.402373194694519, |
| "rewards/margins": 0.2015131413936615, |
| "rewards/rejected": -1.603886365890503, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.3319458209448299, |
| "grad_norm": 2.0968713760375977, |
| "learning_rate": 8.45607823725763e-07, |
| "logits/chosen": -0.4366365075111389, |
| "logits/rejected": -0.41210681200027466, |
| "logps/chosen": -0.6455651521682739, |
| "logps/rejected": -0.7228428721427917, |
| "loss": 1.3247, |
| "rewards/accuracies": 0.5546875, |
| "rewards/chosen": -1.2911303043365479, |
| "rewards/margins": 0.1545555144548416, |
| "rewards/rejected": -1.4456857442855835, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.3340601255368352, |
| "grad_norm": 0.6716106534004211, |
| "learning_rate": 8.429211593257052e-07, |
| "logits/chosen": -0.42992207407951355, |
| "logits/rejected": -0.4105672836303711, |
| "logps/chosen": -0.6981461048126221, |
| "logps/rejected": -0.7909567952156067, |
| "loss": 1.3128, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -1.3962922096252441, |
| "rewards/margins": 0.1856214702129364, |
| "rewards/rejected": -1.5819135904312134, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.33617443012884046, |
| "grad_norm": 2.4430501461029053, |
| "learning_rate": 8.402156688270612e-07, |
| "logits/chosen": -0.4184916317462921, |
| "logits/rejected": -0.3943992257118225, |
| "logps/chosen": -0.6568948030471802, |
| "logps/rejected": -0.7506390810012817, |
| "loss": 1.2992, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -1.3137896060943604, |
| "rewards/margins": 0.18748846650123596, |
| "rewards/rejected": -1.5012781620025635, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.3382887347208457, |
| "grad_norm": 2.0322091579437256, |
| "learning_rate": 8.374915007591052e-07, |
| "logits/chosen": -0.4713057577610016, |
| "logits/rejected": -0.42163771390914917, |
| "logps/chosen": -0.7347853779792786, |
| "logps/rejected": -0.7770044207572937, |
| "loss": 1.3801, |
| "rewards/accuracies": 0.5078125, |
| "rewards/chosen": -1.4695707559585571, |
| "rewards/margins": 0.0844380110502243, |
| "rewards/rejected": -1.5540088415145874, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.340403039312851, |
| "grad_norm": 0.4045500159263611, |
| "learning_rate": 8.347488046764948e-07, |
| "logits/chosen": -0.39465126395225525, |
| "logits/rejected": -0.3961923122406006, |
| "logps/chosen": -0.601732075214386, |
| "logps/rejected": -0.694148600101471, |
| "loss": 1.2859, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -1.203464150428772, |
| "rewards/margins": 0.18483319878578186, |
| "rewards/rejected": -1.388297200202942, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.3425173439048563, |
| "grad_norm": 2.79396915435791, |
| "learning_rate": 8.319877311510612e-07, |
| "logits/chosen": -0.4311378002166748, |
| "logits/rejected": -0.4248836636543274, |
| "logps/chosen": -0.6813413500785828, |
| "logps/rejected": -0.775830864906311, |
| "loss": 1.3001, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -1.3626827001571655, |
| "rewards/margins": 0.18897925317287445, |
| "rewards/rejected": -1.551661729812622, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.34463164849686156, |
| "grad_norm": 0.714146077632904, |
| "learning_rate": 8.292084317635419e-07, |
| "logits/chosen": -0.4060715436935425, |
| "logits/rejected": -0.3770482540130615, |
| "logps/chosen": -0.7176523208618164, |
| "logps/rejected": -0.7973593473434448, |
| "loss": 1.324, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -1.4353046417236328, |
| "rewards/margins": 0.15941408276557922, |
| "rewards/rejected": -1.5947186946868896, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.34674595308886685, |
| "grad_norm": 1.6007037162780762, |
| "learning_rate": 8.264110590952607e-07, |
| "logits/chosen": -0.49063974618911743, |
| "logits/rejected": -0.5119628310203552, |
| "logps/chosen": -0.7263911366462708, |
| "logps/rejected": -0.9138184785842896, |
| "loss": 1.2439, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -1.4527822732925415, |
| "rewards/margins": 0.3748546540737152, |
| "rewards/rejected": -1.827636957168579, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.34886025768087214, |
| "grad_norm": 1.4566830396652222, |
| "learning_rate": 8.235957667197494e-07, |
| "logits/chosen": -0.4681779146194458, |
| "logits/rejected": -0.46475380659103394, |
| "logps/chosen": -0.6923782229423523, |
| "logps/rejected": -0.7901281118392944, |
| "loss": 1.295, |
| "rewards/accuracies": 0.5703125, |
| "rewards/chosen": -1.3847564458847046, |
| "rewards/margins": 0.19549959897994995, |
| "rewards/rejected": -1.5802562236785889, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.35097456227287743, |
| "grad_norm": 3.0825328826904297, |
| "learning_rate": 8.207627091943177e-07, |
| "logits/chosen": -0.4294862151145935, |
| "logits/rejected": -0.42411237955093384, |
| "logps/chosen": -0.6851246356964111, |
| "logps/rejected": -0.7844961881637573, |
| "loss": 1.2871, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -1.3702492713928223, |
| "rewards/margins": 0.19874317944049835, |
| "rewards/rejected": -1.5689923763275146, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.3530888668648827, |
| "grad_norm": 1.0783339738845825, |
| "learning_rate": 8.179120420515675e-07, |
| "logits/chosen": -0.4528030455112457, |
| "logits/rejected": -0.4626815617084503, |
| "logps/chosen": -0.703376293182373, |
| "logps/rejected": -0.8752757906913757, |
| "loss": 1.2193, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -1.406752586364746, |
| "rewards/margins": 0.34379899501800537, |
| "rewards/rejected": -1.7505515813827515, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.355203171456888, |
| "grad_norm": 2.6788036823272705, |
| "learning_rate": 8.150439217908556e-07, |
| "logits/chosen": -0.44946759939193726, |
| "logits/rejected": -0.47430264949798584, |
| "logps/chosen": -0.751136839389801, |
| "logps/rejected": -0.874577522277832, |
| "loss": 1.29, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -1.502273678779602, |
| "rewards/margins": 0.24688144028186798, |
| "rewards/rejected": -1.749155044555664, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.3573174760488933, |
| "grad_norm": 0.9087730050086975, |
| "learning_rate": 8.121585058696999e-07, |
| "logits/chosen": -0.47294262051582336, |
| "logits/rejected": -0.46765226125717163, |
| "logps/chosen": -0.7291173934936523, |
| "logps/rejected": -0.7999277114868164, |
| "loss": 1.3482, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": -1.4582347869873047, |
| "rewards/margins": 0.1416206806898117, |
| "rewards/rejected": -1.5998554229736328, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.3594317806408986, |
| "grad_norm": 3.392674207687378, |
| "learning_rate": 8.092559526951374e-07, |
| "logits/chosen": -0.5026620626449585, |
| "logits/rejected": -0.46620574593544006, |
| "logps/chosen": -0.746992290019989, |
| "logps/rejected": -0.8266301155090332, |
| "loss": 1.3202, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -1.493984580039978, |
| "rewards/margins": 0.15927578508853912, |
| "rewards/rejected": -1.6532602310180664, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.3615460852329039, |
| "grad_norm": 1.27628755569458, |
| "learning_rate": 8.063364216150256e-07, |
| "logits/chosen": -0.5211395025253296, |
| "logits/rejected": -0.5419963598251343, |
| "logps/chosen": -0.7919114828109741, |
| "logps/rejected": -0.8731362223625183, |
| "loss": 1.3228, |
| "rewards/accuracies": 0.5703125, |
| "rewards/chosen": -1.5838229656219482, |
| "rewards/margins": 0.16244953870773315, |
| "rewards/rejected": -1.7462724447250366, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.36366038982490917, |
| "grad_norm": 0.8269656896591187, |
| "learning_rate": 8.034000729092967e-07, |
| "logits/chosen": -0.49545183777809143, |
| "logits/rejected": -0.4716613292694092, |
| "logps/chosen": -0.719520092010498, |
| "logps/rejected": -0.7876347303390503, |
| "loss": 1.3367, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -1.439040184020996, |
| "rewards/margins": 0.13622930645942688, |
| "rewards/rejected": -1.5752694606781006, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.36577469441691446, |
| "grad_norm": 0.6049383282661438, |
| "learning_rate": 8.004470677811559e-07, |
| "logits/chosen": -0.45276379585266113, |
| "logits/rejected": -0.42617955803871155, |
| "logps/chosen": -0.7097947597503662, |
| "logps/rejected": -0.7606989145278931, |
| "loss": 1.3909, |
| "rewards/accuracies": 0.5703125, |
| "rewards/chosen": -1.4195895195007324, |
| "rewards/margins": 0.10180822014808655, |
| "rewards/rejected": -1.5213978290557861, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.36788899900891975, |
| "grad_norm": 3.980013847351074, |
| "learning_rate": 7.974775683482337e-07, |
| "logits/chosen": -0.4783569574356079, |
| "logits/rejected": -0.43521156907081604, |
| "logps/chosen": -0.7623491287231445, |
| "logps/rejected": -0.8719285130500793, |
| "loss": 1.2838, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -1.524698257446289, |
| "rewards/margins": 0.2191585898399353, |
| "rewards/rejected": -1.7438570261001587, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.370003303600925, |
| "grad_norm": 1.024530053138733, |
| "learning_rate": 7.94491737633684e-07, |
| "logits/chosen": -0.5009916424751282, |
| "logits/rejected": -0.48874592781066895, |
| "logps/chosen": -0.7552992701530457, |
| "logps/rejected": -0.8485872745513916, |
| "loss": 1.3153, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": -1.5105985403060913, |
| "rewards/margins": 0.18657605350017548, |
| "rewards/rejected": -1.6971745491027832, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.37211760819293027, |
| "grad_norm": 1.5952919721603394, |
| "learning_rate": 7.91489739557236e-07, |
| "logits/chosen": -0.4424138069152832, |
| "logits/rejected": -0.4334307312965393, |
| "logps/chosen": -0.6956002116203308, |
| "logps/rejected": -0.8018803000450134, |
| "loss": 1.3011, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -1.3912004232406616, |
| "rewards/margins": 0.21256020665168762, |
| "rewards/rejected": -1.6037606000900269, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.37423191278493556, |
| "grad_norm": 1.8331164121627808, |
| "learning_rate": 7.884717389261934e-07, |
| "logits/chosen": -0.4836267828941345, |
| "logits/rejected": -0.5018677115440369, |
| "logps/chosen": -0.7895969152450562, |
| "logps/rejected": -0.927432656288147, |
| "loss": 1.2467, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -1.5791938304901123, |
| "rewards/margins": 0.27567166090011597, |
| "rewards/rejected": -1.854865312576294, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.37634621737694085, |
| "grad_norm": 2.165984869003296, |
| "learning_rate": 7.854379014263876e-07, |
| "logits/chosen": -0.46125832200050354, |
| "logits/rejected": -0.39802712202072144, |
| "logps/chosen": -0.8382925391197205, |
| "logps/rejected": -0.9422982931137085, |
| "loss": 1.339, |
| "rewards/accuracies": 0.5546875, |
| "rewards/chosen": -1.676585078239441, |
| "rewards/margins": 0.20801125466823578, |
| "rewards/rejected": -1.884596586227417, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.37846052196894614, |
| "grad_norm": 0.522197425365448, |
| "learning_rate": 7.823883936130817e-07, |
| "logits/chosen": -0.4747823476791382, |
| "logits/rejected": -0.4888593554496765, |
| "logps/chosen": -0.723059892654419, |
| "logps/rejected": -0.84626305103302, |
| "loss": 1.2708, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": -1.446119785308838, |
| "rewards/margins": 0.24640652537345886, |
| "rewards/rejected": -1.69252610206604, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.38057482656095143, |
| "grad_norm": 1.9690748453140259, |
| "learning_rate": 7.793233829018262e-07, |
| "logits/chosen": -0.5430271625518799, |
| "logits/rejected": -0.5403288006782532, |
| "logps/chosen": -0.8244275450706482, |
| "logps/rejected": -0.9133931994438171, |
| "loss": 1.3306, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -1.6488550901412964, |
| "rewards/margins": 0.17793115973472595, |
| "rewards/rejected": -1.8267863988876343, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.3826891311529567, |
| "grad_norm": 2.9181363582611084, |
| "learning_rate": 7.762430375592688e-07, |
| "logits/chosen": -0.4843495786190033, |
| "logits/rejected": -0.47929176688194275, |
| "logps/chosen": -0.8097372055053711, |
| "logps/rejected": -0.8973760008811951, |
| "loss": 1.3283, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -1.6194744110107422, |
| "rewards/margins": 0.17527759075164795, |
| "rewards/rejected": -1.7947520017623901, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.384803435744962, |
| "grad_norm": 4.227083683013916, |
| "learning_rate": 7.731475266939158e-07, |
| "logits/chosen": -0.5047686696052551, |
| "logits/rejected": -0.4921850264072418, |
| "logps/chosen": -0.875984787940979, |
| "logps/rejected": -1.0406755208969116, |
| "loss": 1.3169, |
| "rewards/accuracies": 0.5703125, |
| "rewards/chosen": -1.751969575881958, |
| "rewards/margins": 0.32938146591186523, |
| "rewards/rejected": -2.0813510417938232, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.3869177403369673, |
| "grad_norm": 1.2871490716934204, |
| "learning_rate": 7.700370202468489e-07, |
| "logits/chosen": -0.5123783349990845, |
| "logits/rejected": -0.55179762840271, |
| "logps/chosen": -0.8869211077690125, |
| "logps/rejected": -1.1082773208618164, |
| "loss": 1.216, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -1.773842215538025, |
| "rewards/margins": 0.4427123963832855, |
| "rewards/rejected": -2.216554641723633, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.3890320449289726, |
| "grad_norm": 1.3015679121017456, |
| "learning_rate": 7.669116889823954e-07, |
| "logits/chosen": -0.49182361364364624, |
| "logits/rejected": -0.5180585384368896, |
| "logps/chosen": -0.8816227912902832, |
| "logps/rejected": -0.9516821503639221, |
| "loss": 1.3449, |
| "rewards/accuracies": 0.5078125, |
| "rewards/chosen": -1.7632455825805664, |
| "rewards/margins": 0.14011862874031067, |
| "rewards/rejected": -1.9033643007278442, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.3911463495209779, |
| "grad_norm": 4.280956268310547, |
| "learning_rate": 7.637717044787526e-07, |
| "logits/chosen": -0.5702117681503296, |
| "logits/rejected": -0.5475804209709167, |
| "logps/chosen": -0.9307697415351868, |
| "logps/rejected": -1.0322346687316895, |
| "loss": 1.3434, |
| "rewards/accuracies": 0.5546875, |
| "rewards/chosen": -1.8615394830703735, |
| "rewards/margins": 0.20292985439300537, |
| "rewards/rejected": -2.064469337463379, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.39326065411298317, |
| "grad_norm": 1.3511455059051514, |
| "learning_rate": 7.606172391185699e-07, |
| "logits/chosen": -0.5466108322143555, |
| "logits/rejected": -0.551085352897644, |
| "logps/chosen": -1.0657893419265747, |
| "logps/rejected": -1.15786612033844, |
| "loss": 1.3549, |
| "rewards/accuracies": 0.4609375, |
| "rewards/chosen": -2.1315786838531494, |
| "rewards/margins": 0.18415334820747375, |
| "rewards/rejected": -2.31573224067688, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.39537495870498846, |
| "grad_norm": 0.7001176476478577, |
| "learning_rate": 7.574484660794836e-07, |
| "logits/chosen": -0.4849010407924652, |
| "logits/rejected": -0.5057946443557739, |
| "logps/chosen": -1.0784757137298584, |
| "logps/rejected": -1.2035218477249146, |
| "loss": 1.3556, |
| "rewards/accuracies": 0.4921875, |
| "rewards/chosen": -2.156951427459717, |
| "rewards/margins": 0.25009211897850037, |
| "rewards/rejected": -2.407043695449829, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.39748926329699374, |
| "grad_norm": 3.1405649185180664, |
| "learning_rate": 7.542655593246103e-07, |
| "logits/chosen": -0.5316596031188965, |
| "logits/rejected": -0.5658366680145264, |
| "logps/chosen": -1.0630009174346924, |
| "logps/rejected": -1.2867177724838257, |
| "loss": 1.2612, |
| "rewards/accuracies": 0.5703125, |
| "rewards/chosen": -2.1260018348693848, |
| "rewards/margins": 0.447433739900589, |
| "rewards/rejected": -2.5734355449676514, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.39960356788899903, |
| "grad_norm": 2.142986297607422, |
| "learning_rate": 7.510686935929962e-07, |
| "logits/chosen": -0.5959028005599976, |
| "logits/rejected": -0.5836039781570435, |
| "logps/chosen": -1.111003041267395, |
| "logps/rejected": -1.1858208179473877, |
| "loss": 1.3958, |
| "rewards/accuracies": 0.5390625, |
| "rewards/chosen": -2.22200608253479, |
| "rewards/margins": 0.149635449051857, |
| "rewards/rejected": -2.3716416358947754, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.40171787248100427, |
| "grad_norm": 1.9227335453033447, |
| "learning_rate": 7.478580443900246e-07, |
| "logits/chosen": -0.607532799243927, |
| "logits/rejected": -0.6102017760276794, |
| "logps/chosen": -1.3353261947631836, |
| "logps/rejected": -1.3975369930267334, |
| "loss": 1.457, |
| "rewards/accuracies": 0.5078125, |
| "rewards/chosen": -2.670652389526367, |
| "rewards/margins": 0.12442154437303543, |
| "rewards/rejected": -2.795073986053467, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.40383217707300956, |
| "grad_norm": 0.8509105443954468, |
| "learning_rate": 7.446337879777802e-07, |
| "logits/chosen": -0.5903070569038391, |
| "logits/rejected": -0.5728173851966858, |
| "logps/chosen": -1.27094566822052, |
| "logps/rejected": -1.3024815320968628, |
| "loss": 1.4953, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -2.54189133644104, |
| "rewards/margins": 0.06307169049978256, |
| "rewards/rejected": -2.6049630641937256, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.40594648166501485, |
| "grad_norm": 1.1561088562011719, |
| "learning_rate": 7.413961013653725e-07, |
| "logits/chosen": -0.5578102469444275, |
| "logits/rejected": -0.5907329320907593, |
| "logps/chosen": -1.3817013502120972, |
| "logps/rejected": -1.419295072555542, |
| "loss": 1.4865, |
| "rewards/accuracies": 0.515625, |
| "rewards/chosen": -2.7634027004241943, |
| "rewards/margins": 0.07518734782934189, |
| "rewards/rejected": -2.838590145111084, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.40806078625702014, |
| "grad_norm": 8.165387153625488, |
| "learning_rate": 7.381451622992183e-07, |
| "logits/chosen": -0.5213198661804199, |
| "logits/rejected": -0.5392848253250122, |
| "logps/chosen": -1.1798306703567505, |
| "logps/rejected": -1.2692899703979492, |
| "loss": 1.3971, |
| "rewards/accuracies": 0.53125, |
| "rewards/chosen": -2.359661340713501, |
| "rewards/margins": 0.17891867458820343, |
| "rewards/rejected": -2.5385799407958984, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.4101750908490254, |
| "grad_norm": 1.2850884199142456, |
| "learning_rate": 7.348811492532839e-07, |
| "logits/chosen": -0.5382787585258484, |
| "logits/rejected": -0.5274642705917358, |
| "logps/chosen": -1.242587685585022, |
| "logps/rejected": -1.272438645362854, |
| "loss": 1.4795, |
| "rewards/accuracies": 0.4921875, |
| "rewards/chosen": -2.485175371170044, |
| "rewards/margins": 0.05970197170972824, |
| "rewards/rejected": -2.544877290725708, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.4122893954410307, |
| "grad_norm": 4.910929203033447, |
| "learning_rate": 7.316042414192864e-07, |
| "logits/chosen": -0.6186666488647461, |
| "logits/rejected": -0.6255884170532227, |
| "logps/chosen": -1.1743704080581665, |
| "logps/rejected": -1.2720146179199219, |
| "loss": 1.4127, |
| "rewards/accuracies": 0.546875, |
| "rewards/chosen": -2.348740816116333, |
| "rewards/margins": 0.19528816640377045, |
| "rewards/rejected": -2.5440292358398438, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.414403700033036, |
| "grad_norm": 4.270901203155518, |
| "learning_rate": 7.283146186968565e-07, |
| "logits/chosen": -0.5861366987228394, |
| "logits/rejected": -0.6005197763442993, |
| "logps/chosen": -1.2127022743225098, |
| "logps/rejected": -1.3036490678787231, |
| "loss": 1.4067, |
| "rewards/accuracies": 0.546875, |
| "rewards/chosen": -2.4254045486450195, |
| "rewards/margins": 0.18189355731010437, |
| "rewards/rejected": -2.6072981357574463, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.4165180046250413, |
| "grad_norm": 0.3070116639137268, |
| "learning_rate": 7.250124616836622e-07, |
| "logits/chosen": -0.6026022434234619, |
| "logits/rejected": -0.5920048952102661, |
| "logps/chosen": -1.0706496238708496, |
| "logps/rejected": -1.2879594564437866, |
| "loss": 1.2465, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -2.141299247741699, |
| "rewards/margins": 0.4346192479133606, |
| "rewards/rejected": -2.5759189128875732, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.4186323092170466, |
| "grad_norm": 1.160252571105957, |
| "learning_rate": 7.216979516654943e-07, |
| "logits/chosen": -0.5808722376823425, |
| "logits/rejected": -0.5770124197006226, |
| "logps/chosen": -1.0426011085510254, |
| "logps/rejected": -1.1295092105865479, |
| "loss": 1.4244, |
| "rewards/accuracies": 0.53125, |
| "rewards/chosen": -2.085202217102051, |
| "rewards/margins": 0.1738162338733673, |
| "rewards/rejected": -2.2590184211730957, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.4207466138090519, |
| "grad_norm": 4.6966471672058105, |
| "learning_rate": 7.183712706063132e-07, |
| "logits/chosen": -0.5958350896835327, |
| "logits/rejected": -0.6440161466598511, |
| "logps/chosen": -0.981076717376709, |
| "logps/rejected": -1.1257147789001465, |
| "loss": 1.3175, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": -1.962153434753418, |
| "rewards/margins": 0.28927627205848694, |
| "rewards/rejected": -2.251429557800293, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.42286091840105716, |
| "grad_norm": 2.9395248889923096, |
| "learning_rate": 7.150326011382603e-07, |
| "logits/chosen": -0.5647889375686646, |
| "logits/rejected": -0.5762943625450134, |
| "logps/chosen": -0.8101261854171753, |
| "logps/rejected": -1.0001438856124878, |
| "loss": 1.2135, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -1.6202523708343506, |
| "rewards/margins": 0.38003528118133545, |
| "rewards/rejected": -2.0002877712249756, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.42497522299306245, |
| "grad_norm": 1.2575147151947021, |
| "learning_rate": 7.116821265516306e-07, |
| "logits/chosen": -0.5834293961524963, |
| "logits/rejected": -0.5929508805274963, |
| "logps/chosen": -0.8768399953842163, |
| "logps/rejected": -1.0942046642303467, |
| "loss": 1.219, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": -1.7536799907684326, |
| "rewards/margins": 0.43472927808761597, |
| "rewards/rejected": -2.1884093284606934, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.42708952758506774, |
| "grad_norm": 1.4035751819610596, |
| "learning_rate": 7.083200307848115e-07, |
| "logits/chosen": -0.5424078106880188, |
| "logits/rejected": -0.5316082239151001, |
| "logps/chosen": -0.8791903257369995, |
| "logps/rejected": -0.9323580265045166, |
| "loss": 1.3675, |
| "rewards/accuracies": 0.546875, |
| "rewards/chosen": -1.758380651473999, |
| "rewards/margins": 0.10633517056703568, |
| "rewards/rejected": -1.8647160530090332, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.42920383217707303, |
| "grad_norm": 1.8622503280639648, |
| "learning_rate": 7.049464984141829e-07, |
| "logits/chosen": -0.5329294204711914, |
| "logits/rejected": -0.5523126721382141, |
| "logps/chosen": -0.695776104927063, |
| "logps/rejected": -0.8400713801383972, |
| "loss": 1.2285, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -1.391552209854126, |
| "rewards/margins": 0.28859058022499084, |
| "rewards/rejected": -1.6801427602767944, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.4313181367690783, |
| "grad_norm": 0.8603182435035706, |
| "learning_rate": 7.015617146439861e-07, |
| "logits/chosen": -0.4516752064228058, |
| "logits/rejected": -0.46907976269721985, |
| "logps/chosen": -0.6868133544921875, |
| "logps/rejected": -0.8646677732467651, |
| "loss": 1.2417, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -1.373626708984375, |
| "rewards/margins": 0.355709046125412, |
| "rewards/rejected": -1.7293355464935303, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.43343244136108355, |
| "grad_norm": 0.6437748670578003, |
| "learning_rate": 6.981658652961546e-07, |
| "logits/chosen": -0.6159051656723022, |
| "logits/rejected": -0.6000130772590637, |
| "logps/chosen": -0.7715178728103638, |
| "logps/rejected": -0.8714219331741333, |
| "loss": 1.3469, |
| "rewards/accuracies": 0.5703125, |
| "rewards/chosen": -1.5430357456207275, |
| "rewards/margins": 0.19980813562870026, |
| "rewards/rejected": -1.7428438663482666, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.43554674595308884, |
| "grad_norm": 1.2309322357177734, |
| "learning_rate": 6.947591368001137e-07, |
| "logits/chosen": -0.5913614630699158, |
| "logits/rejected": -0.6128537654876709, |
| "logps/chosen": -0.7512561678886414, |
| "logps/rejected": -0.8872793912887573, |
| "loss": 1.26, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -1.5025123357772827, |
| "rewards/margins": 0.2720465660095215, |
| "rewards/rejected": -1.7745587825775146, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.43766105054509413, |
| "grad_norm": 0.6153685450553894, |
| "learning_rate": 6.913417161825449e-07, |
| "logits/chosen": -0.5976595878601074, |
| "logits/rejected": -0.6222202181816101, |
| "logps/chosen": -0.837669849395752, |
| "logps/rejected": -0.9835771918296814, |
| "loss": 1.2986, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -1.675339698791504, |
| "rewards/margins": 0.2918146252632141, |
| "rewards/rejected": -1.9671543836593628, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.4397753551370994, |
| "grad_norm": 1.9922760725021362, |
| "learning_rate": 6.87913791057119e-07, |
| "logits/chosen": -0.6808818578720093, |
| "logits/rejected": -0.6692708730697632, |
| "logps/chosen": -0.7088961601257324, |
| "logps/rejected": -0.8256410360336304, |
| "loss": 1.281, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": -1.4177923202514648, |
| "rewards/margins": 0.23348984122276306, |
| "rewards/rejected": -1.6512820720672607, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.4418896597291047, |
| "grad_norm": 1.9562067985534668, |
| "learning_rate": 6.844755496141961e-07, |
| "logits/chosen": -0.5282632112503052, |
| "logits/rejected": -0.5692226886749268, |
| "logps/chosen": -0.7235382795333862, |
| "logps/rejected": -0.801092803478241, |
| "loss": 1.3227, |
| "rewards/accuracies": 0.53125, |
| "rewards/chosen": -1.4470765590667725, |
| "rewards/margins": 0.1551089584827423, |
| "rewards/rejected": -1.602185606956482, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.44400396432111, |
| "grad_norm": 0.8182584047317505, |
| "learning_rate": 6.81027180610493e-07, |
| "logits/chosen": -0.6418904662132263, |
| "logits/rejected": -0.5941328406333923, |
| "logps/chosen": -0.820648729801178, |
| "logps/rejected": -0.8864803910255432, |
| "loss": 1.3498, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -1.641297459602356, |
| "rewards/margins": 0.13166317343711853, |
| "rewards/rejected": -1.7729607820510864, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.4461182689131153, |
| "grad_norm": 3.075260877609253, |
| "learning_rate": 6.775688733587227e-07, |
| "logits/chosen": -0.5926809906959534, |
| "logits/rejected": -0.5844541788101196, |
| "logps/chosen": -0.7822425365447998, |
| "logps/rejected": -0.8866626024246216, |
| "loss": 1.2884, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -1.5644850730895996, |
| "rewards/margins": 0.20884013175964355, |
| "rewards/rejected": -1.7733252048492432, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.4482325735051206, |
| "grad_norm": 0.8032744526863098, |
| "learning_rate": 6.741008177171993e-07, |
| "logits/chosen": -0.579971432685852, |
| "logits/rejected": -0.5978566408157349, |
| "logps/chosen": -0.721234917640686, |
| "logps/rejected": -0.8368514180183411, |
| "loss": 1.2781, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": -1.442469835281372, |
| "rewards/margins": 0.23123310506343842, |
| "rewards/rejected": -1.6737028360366821, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.45034687809712587, |
| "grad_norm": 0.6680911779403687, |
| "learning_rate": 6.706232040794161e-07, |
| "logits/chosen": -0.6748596429824829, |
| "logits/rejected": -0.6615546941757202, |
| "logps/chosen": -0.7931480407714844, |
| "logps/rejected": -0.8879257440567017, |
| "loss": 1.337, |
| "rewards/accuracies": 0.546875, |
| "rewards/chosen": -1.5862960815429688, |
| "rewards/margins": 0.1895553171634674, |
| "rewards/rejected": -1.7758514881134033, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.45246118268913116, |
| "grad_norm": 2.5107688903808594, |
| "learning_rate": 6.671362233635925e-07, |
| "logits/chosen": -0.6460363268852234, |
| "logits/rejected": -0.6273557543754578, |
| "logps/chosen": -0.823783814907074, |
| "logps/rejected": -0.87412428855896, |
| "loss": 1.3756, |
| "rewards/accuracies": 0.53125, |
| "rewards/chosen": -1.647567629814148, |
| "rewards/margins": 0.10068092495203018, |
| "rewards/rejected": -1.74824857711792, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.45457548728113645, |
| "grad_norm": 2.2206740379333496, |
| "learning_rate": 6.636400670021933e-07, |
| "logits/chosen": -0.6295229196548462, |
| "logits/rejected": -0.6330893039703369, |
| "logps/chosen": -0.807812511920929, |
| "logps/rejected": -0.9784457683563232, |
| "loss": 1.2259, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -1.615625023841858, |
| "rewards/margins": 0.3412665128707886, |
| "rewards/rejected": -1.9568915367126465, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.45668979187314174, |
| "grad_norm": 1.2925803661346436, |
| "learning_rate": 6.601349269314187e-07, |
| "logits/chosen": -0.6001027822494507, |
| "logits/rejected": -0.6305864453315735, |
| "logps/chosen": -0.7216315865516663, |
| "logps/rejected": -0.8616191744804382, |
| "loss": 1.269, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -1.4432631731033325, |
| "rewards/margins": 0.2799749970436096, |
| "rewards/rejected": -1.7232383489608765, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.458804096465147, |
| "grad_norm": 4.863992214202881, |
| "learning_rate": 6.566209955806679e-07, |
| "logits/chosen": -0.5307935476303101, |
| "logits/rejected": -0.5385264754295349, |
| "logps/chosen": -0.8053566813468933, |
| "logps/rejected": -0.9241464734077454, |
| "loss": 1.3325, |
| "rewards/accuracies": 0.546875, |
| "rewards/chosen": -1.6107133626937866, |
| "rewards/margins": 0.23757943511009216, |
| "rewards/rejected": -1.8482929468154907, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.4609184010571523, |
| "grad_norm": 1.0189604759216309, |
| "learning_rate": 6.530984658619733e-07, |
| "logits/chosen": -0.7031885385513306, |
| "logits/rejected": -0.7072005867958069, |
| "logps/chosen": -0.8382629752159119, |
| "logps/rejected": -0.9468755722045898, |
| "loss": 1.3276, |
| "rewards/accuracies": 0.5546875, |
| "rewards/chosen": -1.6765259504318237, |
| "rewards/margins": 0.21722503006458282, |
| "rewards/rejected": -1.8937511444091797, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.4630327056491576, |
| "grad_norm": 1.1178699731826782, |
| "learning_rate": 6.495675311594122e-07, |
| "logits/chosen": -0.5736142992973328, |
| "logits/rejected": -0.5926069021224976, |
| "logps/chosen": -0.7676032781600952, |
| "logps/rejected": -0.9179919958114624, |
| "loss": 1.278, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": -1.5352065563201904, |
| "rewards/margins": 0.3007773756980896, |
| "rewards/rejected": -1.8359839916229248, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.4651470102411629, |
| "grad_norm": 2.4985287189483643, |
| "learning_rate": 6.460283853184879e-07, |
| "logits/chosen": -0.6372602581977844, |
| "logits/rejected": -0.6313104033470154, |
| "logps/chosen": -0.8754556179046631, |
| "logps/rejected": -0.9803894758224487, |
| "loss": 1.3166, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -1.7509112358093262, |
| "rewards/margins": 0.2098677009344101, |
| "rewards/rejected": -1.9607789516448975, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.46726131483316813, |
| "grad_norm": 1.5675435066223145, |
| "learning_rate": 6.424812226354889e-07, |
| "logits/chosen": -0.6377983093261719, |
| "logits/rejected": -0.6666730642318726, |
| "logps/chosen": -0.7556843757629395, |
| "logps/rejected": -0.9096466302871704, |
| "loss": 1.2397, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -1.511368751525879, |
| "rewards/margins": 0.30792441964149475, |
| "rewards/rejected": -1.8192932605743408, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.4693756194251734, |
| "grad_norm": 2.853426218032837, |
| "learning_rate": 6.389262378468219e-07, |
| "logits/chosen": -0.6055567860603333, |
| "logits/rejected": -0.612144947052002, |
| "logps/chosen": -0.8588352203369141, |
| "logps/rejected": -0.8928595185279846, |
| "loss": 1.4022, |
| "rewards/accuracies": 0.5078125, |
| "rewards/chosen": -1.7176704406738281, |
| "rewards/margins": 0.06804870069026947, |
| "rewards/rejected": -1.7857190370559692, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.4714899240171787, |
| "grad_norm": 0.528042733669281, |
| "learning_rate": 6.353636261183213e-07, |
| "logits/chosen": -0.6543641090393066, |
| "logits/rejected": -0.6635830402374268, |
| "logps/chosen": -0.7858147621154785, |
| "logps/rejected": -0.9400445222854614, |
| "loss": 1.2446, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -1.571629524230957, |
| "rewards/margins": 0.3084595203399658, |
| "rewards/rejected": -1.8800890445709229, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.473604228609184, |
| "grad_norm": 1.1155768632888794, |
| "learning_rate": 6.317935830345338e-07, |
| "logits/chosen": -0.5700349807739258, |
| "logits/rejected": -0.6560614705085754, |
| "logps/chosen": -0.8426170945167542, |
| "logps/rejected": -0.9983471035957336, |
| "loss": 1.3204, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -1.6852341890335083, |
| "rewards/margins": 0.3114599883556366, |
| "rewards/rejected": -1.9966942071914673, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.4757185332011893, |
| "grad_norm": 0.802669107913971, |
| "learning_rate": 6.282163045879823e-07, |
| "logits/chosen": -0.6912901401519775, |
| "logits/rejected": -0.7201069593429565, |
| "logps/chosen": -0.8135342597961426, |
| "logps/rejected": -0.9537283182144165, |
| "loss": 1.2961, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -1.6270685195922852, |
| "rewards/margins": 0.2803882658481598, |
| "rewards/rejected": -1.907456636428833, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.4778328377931946, |
| "grad_norm": 1.709757924079895, |
| "learning_rate": 6.246319871684047e-07, |
| "logits/chosen": -0.7573816776275635, |
| "logits/rejected": -0.8028420209884644, |
| "logps/chosen": -0.891952633857727, |
| "logps/rejected": -1.0168029069900513, |
| "loss": 1.333, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": -1.783905267715454, |
| "rewards/margins": 0.24970072507858276, |
| "rewards/rejected": -2.0336058139801025, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.47994714238519987, |
| "grad_norm": 2.170957326889038, |
| "learning_rate": 6.210408275519734e-07, |
| "logits/chosen": -0.6915597915649414, |
| "logits/rejected": -0.7027997970581055, |
| "logps/chosen": -0.9063036441802979, |
| "logps/rejected": -1.0104373693466187, |
| "loss": 1.3388, |
| "rewards/accuracies": 0.5390625, |
| "rewards/chosen": -1.8126072883605957, |
| "rewards/margins": 0.20826762914657593, |
| "rewards/rejected": -2.0208747386932373, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.48206144697720515, |
| "grad_norm": 1.8802261352539062, |
| "learning_rate": 6.174430228904919e-07, |
| "logits/chosen": -0.689726710319519, |
| "logits/rejected": -0.7143282890319824, |
| "logps/chosen": -0.7480812072753906, |
| "logps/rejected": -0.8698041439056396, |
| "loss": 1.2836, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": -1.4961624145507812, |
| "rewards/margins": 0.24344584345817566, |
| "rewards/rejected": -1.7396082878112793, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.48417575156921044, |
| "grad_norm": 2.5202934741973877, |
| "learning_rate": 6.13838770700571e-07, |
| "logits/chosen": -0.6858299374580383, |
| "logits/rejected": -0.7115206122398376, |
| "logps/chosen": -0.8575515151023865, |
| "logps/rejected": -0.9657347202301025, |
| "loss": 1.3046, |
| "rewards/accuracies": 0.546875, |
| "rewards/chosen": -1.715103030204773, |
| "rewards/margins": 0.21636635065078735, |
| "rewards/rejected": -1.931469440460205, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.48629005616121573, |
| "grad_norm": 1.268512487411499, |
| "learning_rate": 6.102282688527859e-07, |
| "logits/chosen": -0.7078689932823181, |
| "logits/rejected": -0.7254161238670349, |
| "logps/chosen": -0.8850880861282349, |
| "logps/rejected": -1.031385898590088, |
| "loss": 1.2816, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -1.7701761722564697, |
| "rewards/margins": 0.29259535670280457, |
| "rewards/rejected": -2.062771797180176, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.488404360753221, |
| "grad_norm": 1.7285584211349487, |
| "learning_rate": 6.066117155608135e-07, |
| "logits/chosen": -0.7325868606567383, |
| "logits/rejected": -0.7433226108551025, |
| "logps/chosen": -0.8014956116676331, |
| "logps/rejected": -0.9653260111808777, |
| "loss": 1.2429, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -1.6029912233352661, |
| "rewards/margins": 0.32766085863113403, |
| "rewards/rejected": -1.9306520223617554, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.4905186653452263, |
| "grad_norm": 0.6270304322242737, |
| "learning_rate": 6.029893093705491e-07, |
| "logits/chosen": -0.692166805267334, |
| "logits/rejected": -0.6799293756484985, |
| "logps/chosen": -0.7850213646888733, |
| "logps/rejected": -0.8839574456214905, |
| "loss": 1.2967, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -1.5700427293777466, |
| "rewards/margins": 0.19787229597568512, |
| "rewards/rejected": -1.767914891242981, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.4926329699372316, |
| "grad_norm": 1.0160484313964844, |
| "learning_rate": 5.993612491492087e-07, |
| "logits/chosen": -0.7095844149589539, |
| "logits/rejected": -0.71524578332901, |
| "logps/chosen": -0.7063854336738586, |
| "logps/rejected": -0.8855549097061157, |
| "loss": 1.2176, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -1.4127708673477173, |
| "rewards/margins": 0.3583390712738037, |
| "rewards/rejected": -1.7711098194122314, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.4947472745292369, |
| "grad_norm": 2.225841999053955, |
| "learning_rate": 5.957277340744094e-07, |
| "logits/chosen": -0.7488946318626404, |
| "logits/rejected": -0.7588428854942322, |
| "logps/chosen": -0.9203822612762451, |
| "logps/rejected": -1.0089298486709595, |
| "loss": 1.355, |
| "rewards/accuracies": 0.5390625, |
| "rewards/chosen": -1.8407645225524902, |
| "rewards/margins": 0.17709502577781677, |
| "rewards/rejected": -2.017859697341919, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.4968615791212422, |
| "grad_norm": 1.9577795267105103, |
| "learning_rate": 5.920889636232351e-07, |
| "logits/chosen": -0.8078997731208801, |
| "logits/rejected": -0.8064825534820557, |
| "logps/chosen": -0.8004480004310608, |
| "logps/rejected": -0.9856831431388855, |
| "loss": 1.2273, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -1.6008960008621216, |
| "rewards/margins": 0.3704703152179718, |
| "rewards/rejected": -1.971366286277771, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.4989758837132474, |
| "grad_norm": 2.5050246715545654, |
| "learning_rate": 5.884451375612865e-07, |
| "logits/chosen": -0.7499472498893738, |
| "logits/rejected": -0.7421904802322388, |
| "logps/chosen": -0.8363584876060486, |
| "logps/rejected": -0.9543781876564026, |
| "loss": 1.3002, |
| "rewards/accuracies": 0.5390625, |
| "rewards/chosen": -1.6727169752120972, |
| "rewards/margins": 0.23603934049606323, |
| "rewards/rejected": -1.9087563753128052, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.5010901883052528, |
| "grad_norm": 0.585436224937439, |
| "learning_rate": 5.847964559317128e-07, |
| "logits/chosen": -0.730015218257904, |
| "logits/rejected": -0.7154791355133057, |
| "logps/chosen": -0.8828849196434021, |
| "logps/rejected": -0.9897070527076721, |
| "loss": 1.347, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -1.7657698392868042, |
| "rewards/margins": 0.21364440023899078, |
| "rewards/rejected": -1.9794141054153442, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.503204492897258, |
| "grad_norm": 0.9204092621803284, |
| "learning_rate": 5.8114311904423e-07, |
| "logits/chosen": -0.759974479675293, |
| "logits/rejected": -0.7793674468994141, |
| "logps/chosen": -0.8321584463119507, |
| "logps/rejected": -1.0809751749038696, |
| "loss": 1.2185, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -1.6643168926239014, |
| "rewards/margins": 0.4976334273815155, |
| "rewards/rejected": -2.1619503498077393, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.5053187974892633, |
| "grad_norm": 5.147011756896973, |
| "learning_rate": 5.774853274641243e-07, |
| "logits/chosen": -0.7148956060409546, |
| "logits/rejected": -0.7363921403884888, |
| "logps/chosen": -0.8623124361038208, |
| "logps/rejected": -1.0681498050689697, |
| "loss": 1.2353, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -1.7246248722076416, |
| "rewards/margins": 0.4116746187210083, |
| "rewards/rejected": -2.1362996101379395, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.5074331020812686, |
| "grad_norm": 1.9065529108047485, |
| "learning_rate": 5.738232820012407e-07, |
| "logits/chosen": -0.7158540487289429, |
| "logits/rejected": -0.7083900570869446, |
| "logps/chosen": -0.981558620929718, |
| "logps/rejected": -1.054612636566162, |
| "loss": 1.3594, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -1.963117241859436, |
| "rewards/margins": 0.14610806107521057, |
| "rewards/rejected": -2.109225273132324, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.5095474066732739, |
| "grad_norm": 2.4411256313323975, |
| "learning_rate": 5.701571836989591e-07, |
| "logits/chosen": -0.8441444039344788, |
| "logits/rejected": -0.8529233336448669, |
| "logps/chosen": -0.8665949702262878, |
| "logps/rejected": -1.030572772026062, |
| "loss": 1.2477, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -1.7331899404525757, |
| "rewards/margins": 0.3279556334018707, |
| "rewards/rejected": -2.061145544052124, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.5116617112652792, |
| "grad_norm": 2.461113214492798, |
| "learning_rate": 5.664872338231571e-07, |
| "logits/chosen": -0.7463312149047852, |
| "logits/rejected": -0.7725105285644531, |
| "logps/chosen": -0.9185941815376282, |
| "logps/rejected": -1.1244423389434814, |
| "loss": 1.2404, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -1.8371883630752563, |
| "rewards/margins": 0.411696195602417, |
| "rewards/rejected": -2.248884677886963, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.5137760158572844, |
| "grad_norm": 3.5861761569976807, |
| "learning_rate": 5.628136338511607e-07, |
| "logits/chosen": -0.8432914018630981, |
| "logits/rejected": -0.85801100730896, |
| "logps/chosen": -0.8873915672302246, |
| "logps/rejected": -1.0090795755386353, |
| "loss": 1.3072, |
| "rewards/accuracies": 0.609375, |
| "rewards/chosen": -1.7747831344604492, |
| "rewards/margins": 0.24337637424468994, |
| "rewards/rejected": -2.0181591510772705, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.5158903204492897, |
| "grad_norm": 2.109071969985962, |
| "learning_rate": 5.591365854606829e-07, |
| "logits/chosen": -0.7899532318115234, |
| "logits/rejected": -0.7548331618309021, |
| "logps/chosen": -0.9333330392837524, |
| "logps/rejected": -1.00949227809906, |
| "loss": 1.3749, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -1.8666660785675049, |
| "rewards/margins": 0.1523183286190033, |
| "rewards/rejected": -2.01898455619812, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.518004625041295, |
| "grad_norm": 2.2017955780029297, |
| "learning_rate": 5.554562905187527e-07, |
| "logits/chosen": -0.7569047212600708, |
| "logits/rejected": -0.7679808735847473, |
| "logps/chosen": -0.9779613614082336, |
| "logps/rejected": -1.1713427305221558, |
| "loss": 1.2628, |
| "rewards/accuracies": 0.609375, |
| "rewards/chosen": -1.9559227228164673, |
| "rewards/margins": 0.3867628276348114, |
| "rewards/rejected": -2.3426854610443115, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.5201189296333003, |
| "grad_norm": 4.651991367340088, |
| "learning_rate": 5.517729510706315e-07, |
| "logits/chosen": -0.8546395301818848, |
| "logits/rejected": -0.8609369397163391, |
| "logps/chosen": -0.9926605224609375, |
| "logps/rejected": -1.1553713083267212, |
| "loss": 1.2812, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": -1.985321044921875, |
| "rewards/margins": 0.32542160153388977, |
| "rewards/rejected": -2.3107426166534424, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.5222332342253055, |
| "grad_norm": 2.6384060382843018, |
| "learning_rate": 5.480867693287223e-07, |
| "logits/chosen": -0.7734386324882507, |
| "logits/rejected": -0.7963250875473022, |
| "logps/chosen": -0.8996341824531555, |
| "logps/rejected": -1.0466523170471191, |
| "loss": 1.2849, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -1.799268364906311, |
| "rewards/margins": 0.2940361201763153, |
| "rewards/rejected": -2.0933046340942383, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.5243475388173109, |
| "grad_norm": 1.3608977794647217, |
| "learning_rate": 5.443979476614674e-07, |
| "logits/chosen": -0.7350472807884216, |
| "logits/rejected": -0.7215992212295532, |
| "logps/chosen": -0.8887076377868652, |
| "logps/rejected": -1.0147045850753784, |
| "loss": 1.3182, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": -1.7774152755737305, |
| "rewards/margins": 0.25199398398399353, |
| "rewards/rejected": -2.029409170150757, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.5264618434093161, |
| "grad_norm": 3.017115354537964, |
| "learning_rate": 5.407066885822391e-07, |
| "logits/chosen": -0.827782154083252, |
| "logits/rejected": -0.8471929430961609, |
| "logps/chosen": -0.9262440800666809, |
| "logps/rejected": -1.1658306121826172, |
| "loss": 1.1882, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -1.8524881601333618, |
| "rewards/margins": 0.47917306423187256, |
| "rewards/rejected": -2.3316612243652344, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.5285761480013215, |
| "grad_norm": 0.7805312275886536, |
| "learning_rate": 5.370131947382214e-07, |
| "logits/chosen": -0.7815499305725098, |
| "logits/rejected": -0.8279274702072144, |
| "logps/chosen": -0.968708872795105, |
| "logps/rejected": -1.2697322368621826, |
| "loss": 1.2092, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -1.93741774559021, |
| "rewards/margins": 0.6020466685295105, |
| "rewards/rejected": -2.5394644737243652, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.5306904525933267, |
| "grad_norm": 2.229363441467285, |
| "learning_rate": 5.333176688992855e-07, |
| "logits/chosen": -0.7824153900146484, |
| "logits/rejected": -0.8154900074005127, |
| "logps/chosen": -1.0211957693099976, |
| "logps/rejected": -1.2145965099334717, |
| "loss": 1.3074, |
| "rewards/accuracies": 0.609375, |
| "rewards/chosen": -2.042391538619995, |
| "rewards/margins": 0.3868010938167572, |
| "rewards/rejected": -2.4291930198669434, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.532804757185332, |
| "grad_norm": 1.1359837055206299, |
| "learning_rate": 5.296203139468571e-07, |
| "logits/chosen": -0.7467613220214844, |
| "logits/rejected": -0.7548531889915466, |
| "logps/chosen": -1.0614902973175049, |
| "logps/rejected": -1.2674376964569092, |
| "loss": 1.2512, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -2.1229805946350098, |
| "rewards/margins": 0.4118950664997101, |
| "rewards/rejected": -2.5348753929138184, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.5349190617773373, |
| "grad_norm": 3.0548548698425293, |
| "learning_rate": 5.259213328627792e-07, |
| "logits/chosen": -0.7868636250495911, |
| "logits/rejected": -0.8130850791931152, |
| "logps/chosen": -1.0743666887283325, |
| "logps/rejected": -1.2010191679000854, |
| "loss": 1.3275, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -2.148733377456665, |
| "rewards/margins": 0.2533051669597626, |
| "rewards/rejected": -2.402038335800171, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.5370333663693426, |
| "grad_norm": 1.7205246686935425, |
| "learning_rate": 5.222209287181676e-07, |
| "logits/chosen": -0.81404709815979, |
| "logits/rejected": -0.8481613397598267, |
| "logps/chosen": -1.1599587202072144, |
| "logps/rejected": -1.4234716892242432, |
| "loss": 1.2894, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": -2.3199174404144287, |
| "rewards/margins": 0.5270256400108337, |
| "rewards/rejected": -2.8469433784484863, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.5391476709613479, |
| "grad_norm": 2.2516112327575684, |
| "learning_rate": 5.185193046622634e-07, |
| "logits/chosen": -0.8112510442733765, |
| "logits/rejected": -0.8310728073120117, |
| "logps/chosen": -1.1263186931610107, |
| "logps/rejected": -1.3256827592849731, |
| "loss": 1.3552, |
| "rewards/accuracies": 0.5390625, |
| "rewards/chosen": -2.2526373863220215, |
| "rewards/margins": 0.39872825145721436, |
| "rewards/rejected": -2.6513655185699463, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.5412619755533532, |
| "grad_norm": 2.8379359245300293, |
| "learning_rate": 5.148166639112799e-07, |
| "logits/chosen": -0.8202102184295654, |
| "logits/rejected": -0.845209002494812, |
| "logps/chosen": -1.264180302619934, |
| "logps/rejected": -1.6190590858459473, |
| "loss": 1.2083, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -2.528360605239868, |
| "rewards/margins": 0.7097575068473816, |
| "rewards/rejected": -3.2381181716918945, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.5433762801453584, |
| "grad_norm": 4.676355838775635, |
| "learning_rate": 5.111132097372459e-07, |
| "logits/chosen": -0.8866451978683472, |
| "logits/rejected": -0.8642281889915466, |
| "logps/chosen": -1.3194389343261719, |
| "logps/rejected": -1.4506916999816895, |
| "loss": 1.4002, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -2.6388778686523438, |
| "rewards/margins": 0.2625058591365814, |
| "rewards/rejected": -2.901383399963379, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.5454905847373638, |
| "grad_norm": 2.55251407623291, |
| "learning_rate": 5.074091454568463e-07, |
| "logits/chosen": -0.7903708815574646, |
| "logits/rejected": -0.8010709881782532, |
| "logps/chosen": -1.3550961017608643, |
| "logps/rejected": -1.661428451538086, |
| "loss": 1.2131, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -2.7101922035217285, |
| "rewards/margins": 0.6126645803451538, |
| "rewards/rejected": -3.322856903076172, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.547604889329369, |
| "grad_norm": 4.116572856903076, |
| "learning_rate": 5.037046744202611e-07, |
| "logits/chosen": -0.7501232624053955, |
| "logits/rejected": -0.7825176119804382, |
| "logps/chosen": -1.2111856937408447, |
| "logps/rejected": -1.5176191329956055, |
| "loss": 1.1345, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -2.4223713874816895, |
| "rewards/margins": 0.6128667593002319, |
| "rewards/rejected": -3.035238265991211, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.5497191939213742, |
| "grad_norm": 2.0285205841064453, |
| "learning_rate": 5e-07, |
| "logits/chosen": -0.8355445861816406, |
| "logits/rejected": -0.8497716784477234, |
| "logps/chosen": -1.1876304149627686, |
| "logps/rejected": -1.4788450002670288, |
| "loss": 1.1559, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -2.375260829925537, |
| "rewards/margins": 0.5824294090270996, |
| "rewards/rejected": -2.9576900005340576, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.5518334985133796, |
| "grad_norm": 4.681185245513916, |
| "learning_rate": 4.962953255797389e-07, |
| "logits/chosen": -0.8240503072738647, |
| "logits/rejected": -0.8016488552093506, |
| "logps/chosen": -1.2238959074020386, |
| "logps/rejected": -1.4727882146835327, |
| "loss": 1.2914, |
| "rewards/accuracies": 0.5546875, |
| "rewards/chosen": -2.447791814804077, |
| "rewards/margins": 0.49778467416763306, |
| "rewards/rejected": -2.9455764293670654, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.5539478031053848, |
| "grad_norm": 5.15679931640625, |
| "learning_rate": 4.925908545431537e-07, |
| "logits/chosen": -0.728940486907959, |
| "logits/rejected": -0.7355924248695374, |
| "logps/chosen": -1.3356778621673584, |
| "logps/rejected": -1.6726096868515015, |
| "loss": 1.1434, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -2.671355724334717, |
| "rewards/margins": 0.6738637685775757, |
| "rewards/rejected": -3.345219373703003, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.5560621076973902, |
| "grad_norm": 2.481048345565796, |
| "learning_rate": 4.888867902627543e-07, |
| "logits/chosen": -0.8311591148376465, |
| "logits/rejected": -0.8191719055175781, |
| "logps/chosen": -1.2743335962295532, |
| "logps/rejected": -1.5339927673339844, |
| "loss": 1.2164, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -2.5486671924591064, |
| "rewards/margins": 0.5193185210227966, |
| "rewards/rejected": -3.0679855346679688, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.5581764122893954, |
| "grad_norm": 3.6758291721343994, |
| "learning_rate": 4.851833360887201e-07, |
| "logits/chosen": -0.6787989735603333, |
| "logits/rejected": -0.668928325176239, |
| "logps/chosen": -1.2278664112091064, |
| "logps/rejected": -1.4955706596374512, |
| "loss": 1.1942, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -2.455732822418213, |
| "rewards/margins": 0.535408616065979, |
| "rewards/rejected": -2.9911413192749023, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.5602907168814008, |
| "grad_norm": 2.7282023429870605, |
| "learning_rate": 4.814806953377365e-07, |
| "logits/chosen": -0.7772133350372314, |
| "logits/rejected": -0.7689889073371887, |
| "logps/chosen": -1.1954048871994019, |
| "logps/rejected": -1.444943904876709, |
| "loss": 1.2686, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -2.3908097743988037, |
| "rewards/margins": 0.4990782141685486, |
| "rewards/rejected": -2.889887809753418, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.562405021473406, |
| "grad_norm": 2.8753116130828857, |
| "learning_rate": 4.777790712818323e-07, |
| "logits/chosen": -0.6946043968200684, |
| "logits/rejected": -0.7001516819000244, |
| "logps/chosen": -1.2844620943069458, |
| "logps/rejected": -1.486103892326355, |
| "loss": 1.284, |
| "rewards/accuracies": 0.5703125, |
| "rewards/chosen": -2.5689241886138916, |
| "rewards/margins": 0.4032836854457855, |
| "rewards/rejected": -2.97220778465271, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.5645193260654113, |
| "grad_norm": 1.5583593845367432, |
| "learning_rate": 4.740786671372209e-07, |
| "logits/chosen": -0.7396820187568665, |
| "logits/rejected": -0.7129873037338257, |
| "logps/chosen": -1.410097599029541, |
| "logps/rejected": -1.6091456413269043, |
| "loss": 1.3158, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -2.820195198059082, |
| "rewards/margins": 0.3980959951877594, |
| "rewards/rejected": -3.2182912826538086, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.5666336306574166, |
| "grad_norm": 3.5984952449798584, |
| "learning_rate": 4.703796860531429e-07, |
| "logits/chosen": -0.7031830549240112, |
| "logits/rejected": -0.700330376625061, |
| "logps/chosen": -1.633664608001709, |
| "logps/rejected": -1.9186874628067017, |
| "loss": 1.2479, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -3.267329216003418, |
| "rewards/margins": 0.5700456500053406, |
| "rewards/rejected": -3.8373749256134033, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.5687479352494219, |
| "grad_norm": 6.295733451843262, |
| "learning_rate": 4.666823311007144e-07, |
| "logits/chosen": -0.8001950979232788, |
| "logits/rejected": -0.8042099475860596, |
| "logps/chosen": -1.5675832033157349, |
| "logps/rejected": -1.9247075319290161, |
| "loss": 1.1759, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -3.1351664066314697, |
| "rewards/margins": 0.7142485976219177, |
| "rewards/rejected": -3.8494150638580322, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.5708622398414271, |
| "grad_norm": 3.6349036693573, |
| "learning_rate": 4.6298680526177855e-07, |
| "logits/chosen": -0.8108068704605103, |
| "logits/rejected": -0.8030902147293091, |
| "logps/chosen": -1.8205997943878174, |
| "logps/rejected": -2.195197105407715, |
| "loss": 1.1864, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -3.6411995887756348, |
| "rewards/margins": 0.7491948008537292, |
| "rewards/rejected": -4.39039421081543, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.5729765444334325, |
| "grad_norm": 4.786395072937012, |
| "learning_rate": 4.59293311417761e-07, |
| "logits/chosen": -0.798182487487793, |
| "logits/rejected": -0.7736828327178955, |
| "logps/chosen": -1.8617057800292969, |
| "logps/rejected": -2.08984637260437, |
| "loss": 1.3947, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -3.7234115600585938, |
| "rewards/margins": 0.4562810957431793, |
| "rewards/rejected": -4.17969274520874, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.5750908490254377, |
| "grad_norm": 6.7946457862854, |
| "learning_rate": 4.556020523385326e-07, |
| "logits/chosen": -0.7530428171157837, |
| "logits/rejected": -0.7395590543746948, |
| "logps/chosen": -1.8709862232208252, |
| "logps/rejected": -2.3599390983581543, |
| "loss": 1.1025, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -3.7419724464416504, |
| "rewards/margins": 0.9779053926467896, |
| "rewards/rejected": -4.719878196716309, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.5772051536174431, |
| "grad_norm": 4.877624988555908, |
| "learning_rate": 4.5191323067127773e-07, |
| "logits/chosen": -0.7732480764389038, |
| "logits/rejected": -0.7835702300071716, |
| "logps/chosen": -2.0340800285339355, |
| "logps/rejected": -2.330742835998535, |
| "loss": 1.3198, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -4.068160057067871, |
| "rewards/margins": 0.5933258533477783, |
| "rewards/rejected": -4.66148567199707, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.5793194582094483, |
| "grad_norm": 9.001680374145508, |
| "learning_rate": 4.482270489293685e-07, |
| "logits/chosen": -0.9062263369560242, |
| "logits/rejected": -0.9105854630470276, |
| "logps/chosen": -2.1364972591400146, |
| "logps/rejected": -2.4467523097991943, |
| "loss": 1.3464, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -4.272994518280029, |
| "rewards/margins": 0.6205099821090698, |
| "rewards/rejected": -4.893504619598389, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.5814337628014535, |
| "grad_norm": 2.811025619506836, |
| "learning_rate": 4.445437094812475e-07, |
| "logits/chosen": -0.8593579530715942, |
| "logits/rejected": -0.8343831896781921, |
| "logps/chosen": -2.452843189239502, |
| "logps/rejected": -2.7551848888397217, |
| "loss": 1.3536, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -4.905686378479004, |
| "rewards/margins": 0.6046838760375977, |
| "rewards/rejected": -5.510369777679443, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.5835480673934589, |
| "grad_norm": 2.2030158042907715, |
| "learning_rate": 4.4086341453931714e-07, |
| "logits/chosen": -0.8991417288780212, |
| "logits/rejected": -0.8766486644744873, |
| "logps/chosen": -2.30641508102417, |
| "logps/rejected": -2.7606654167175293, |
| "loss": 1.1708, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -4.61283016204834, |
| "rewards/margins": 0.9085015654563904, |
| "rewards/rejected": -5.521330833435059, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.5856623719854641, |
| "grad_norm": 5.5185227394104, |
| "learning_rate": 4.371863661488393e-07, |
| "logits/chosen": -0.8738227486610413, |
| "logits/rejected": -0.8665530681610107, |
| "logps/chosen": -2.29125714302063, |
| "logps/rejected": -2.7014153003692627, |
| "loss": 1.1883, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -4.58251428604126, |
| "rewards/margins": 0.8203167915344238, |
| "rewards/rejected": -5.402830600738525, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.5877766765774695, |
| "grad_norm": 2.0779521465301514, |
| "learning_rate": 4.3351276617684285e-07, |
| "logits/chosen": -0.958415150642395, |
| "logits/rejected": -0.9585077166557312, |
| "logps/chosen": -2.4368410110473633, |
| "logps/rejected": -2.798506736755371, |
| "loss": 1.1749, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -4.873682022094727, |
| "rewards/margins": 0.7233313322067261, |
| "rewards/rejected": -5.597013473510742, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.5898909811694747, |
| "grad_norm": 2.884877920150757, |
| "learning_rate": 4.29842816301041e-07, |
| "logits/chosen": -0.9413051605224609, |
| "logits/rejected": -0.9224691987037659, |
| "logps/chosen": -2.485034942626953, |
| "logps/rejected": -2.911332368850708, |
| "loss": 1.2035, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": -4.970069885253906, |
| "rewards/margins": 0.8525944948196411, |
| "rewards/rejected": -5.822664737701416, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.59200528576148, |
| "grad_norm": 5.203248500823975, |
| "learning_rate": 4.2617671799875944e-07, |
| "logits/chosen": -0.9359334111213684, |
| "logits/rejected": -0.9387660026550293, |
| "logps/chosen": -2.378349542617798, |
| "logps/rejected": -2.730886936187744, |
| "loss": 1.2253, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -4.756699085235596, |
| "rewards/margins": 0.7050745487213135, |
| "rewards/rejected": -5.461773872375488, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.5941195903534853, |
| "grad_norm": 6.818525314331055, |
| "learning_rate": 4.225146725358758e-07, |
| "logits/chosen": -0.8864554166793823, |
| "logits/rejected": -0.8813320398330688, |
| "logps/chosen": -2.4233975410461426, |
| "logps/rejected": -2.8188178539276123, |
| "loss": 1.2281, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -4.846795082092285, |
| "rewards/margins": 0.7908411622047424, |
| "rewards/rejected": -5.637635707855225, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.5962338949454906, |
| "grad_norm": 2.529154062271118, |
| "learning_rate": 4.1885688095577e-07, |
| "logits/chosen": -0.8420325517654419, |
| "logits/rejected": -0.8822402954101562, |
| "logps/chosen": -2.626488447189331, |
| "logps/rejected": -3.1887192726135254, |
| "loss": 1.0561, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -5.252976894378662, |
| "rewards/margins": 1.1244611740112305, |
| "rewards/rejected": -6.377438545227051, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.5983481995374959, |
| "grad_norm": 3.0739686489105225, |
| "learning_rate": 4.152035440682873e-07, |
| "logits/chosen": -0.8550993204116821, |
| "logits/rejected": -0.8528580665588379, |
| "logps/chosen": -2.6387887001037598, |
| "logps/rejected": -2.9952192306518555, |
| "loss": 1.3409, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": -5.2775774002075195, |
| "rewards/margins": 0.7128612399101257, |
| "rewards/rejected": -5.990438461303711, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.6004625041295012, |
| "grad_norm": 3.6649062633514404, |
| "learning_rate": 4.1155486243871363e-07, |
| "logits/chosen": -0.8643282651901245, |
| "logits/rejected": -0.9175342321395874, |
| "logps/chosen": -2.929072618484497, |
| "logps/rejected": -3.105940580368042, |
| "loss": 1.5121, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -5.858145236968994, |
| "rewards/margins": 0.3537355065345764, |
| "rewards/rejected": -6.211881160736084, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.6025768087215064, |
| "grad_norm": 2.5071723461151123, |
| "learning_rate": 4.0791103637676486e-07, |
| "logits/chosen": -0.8368631601333618, |
| "logits/rejected": -0.819808304309845, |
| "logps/chosen": -3.0672600269317627, |
| "logps/rejected": -3.4685003757476807, |
| "loss": 1.3236, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -6.134520053863525, |
| "rewards/margins": 0.8024805784225464, |
| "rewards/rejected": -6.937000751495361, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.6046911133135118, |
| "grad_norm": 8.780280113220215, |
| "learning_rate": 4.042722659255906e-07, |
| "logits/chosen": -0.8249569535255432, |
| "logits/rejected": -0.8442113995552063, |
| "logps/chosen": -3.3199872970581055, |
| "logps/rejected": -3.7276291847229004, |
| "loss": 1.322, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -6.639974594116211, |
| "rewards/margins": 0.8152831792831421, |
| "rewards/rejected": -7.455258369445801, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.606805417905517, |
| "grad_norm": 3.4388678073883057, |
| "learning_rate": 4.006387508507914e-07, |
| "logits/chosen": -0.7224047780036926, |
| "logits/rejected": -0.7616450786590576, |
| "logps/chosen": -2.9411330223083496, |
| "logps/rejected": -3.32680082321167, |
| "loss": 1.2868, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -5.882266044616699, |
| "rewards/margins": 0.7713361978530884, |
| "rewards/rejected": -6.65360164642334, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.6089197224975224, |
| "grad_norm": 5.095273971557617, |
| "learning_rate": 3.970106906294509e-07, |
| "logits/chosen": -0.7394692897796631, |
| "logits/rejected": -0.7316830158233643, |
| "logps/chosen": -2.9902045726776123, |
| "logps/rejected": -3.469916820526123, |
| "loss": 1.1694, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -5.980409145355225, |
| "rewards/margins": 0.9594244360923767, |
| "rewards/rejected": -6.939833641052246, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.6110340270895276, |
| "grad_norm": 2.1398613452911377, |
| "learning_rate": 3.933882844391866e-07, |
| "logits/chosen": -0.8181312084197998, |
| "logits/rejected": -0.833306610584259, |
| "logps/chosen": -3.0137529373168945, |
| "logps/rejected": -3.4241840839385986, |
| "loss": 1.2453, |
| "rewards/accuracies": 0.609375, |
| "rewards/chosen": -6.027505874633789, |
| "rewards/margins": 0.8208625316619873, |
| "rewards/rejected": -6.848368167877197, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.6131483316815328, |
| "grad_norm": 4.185284614562988, |
| "learning_rate": 3.89771731147214e-07, |
| "logits/chosen": -0.7805104851722717, |
| "logits/rejected": -0.8086984753608704, |
| "logps/chosen": -2.984957218170166, |
| "logps/rejected": -3.430112361907959, |
| "loss": 1.2671, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -5.969914436340332, |
| "rewards/margins": 0.890310525894165, |
| "rewards/rejected": -6.860224723815918, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.6152626362735382, |
| "grad_norm": 7.104829788208008, |
| "learning_rate": 3.861612292994292e-07, |
| "logits/chosen": -0.7788286209106445, |
| "logits/rejected": -0.8027424216270447, |
| "logps/chosen": -2.896563768386841, |
| "logps/rejected": -3.1082046031951904, |
| "loss": 1.4853, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -5.793127536773682, |
| "rewards/margins": 0.42328107357025146, |
| "rewards/rejected": -6.216409206390381, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.6173769408655434, |
| "grad_norm": 3.795579433441162, |
| "learning_rate": 3.825569771095082e-07, |
| "logits/chosen": -0.8044757843017578, |
| "logits/rejected": -0.7828265428543091, |
| "logps/chosen": -2.8059256076812744, |
| "logps/rejected": -3.3121094703674316, |
| "loss": 1.1299, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -5.611851215362549, |
| "rewards/margins": 1.0123679637908936, |
| "rewards/rejected": -6.624218940734863, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.6194912454575487, |
| "grad_norm": 4.486142158508301, |
| "learning_rate": 3.7895917244802655e-07, |
| "logits/chosen": -0.7511788606643677, |
| "logits/rejected": -0.7885503768920898, |
| "logps/chosen": -2.927251100540161, |
| "logps/rejected": -3.1605303287506104, |
| "loss": 1.426, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": -5.854502201080322, |
| "rewards/margins": 0.466558575630188, |
| "rewards/rejected": -6.321060657501221, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.621605550049554, |
| "grad_norm": 3.3942787647247314, |
| "learning_rate": 3.753680128315952e-07, |
| "logits/chosen": -0.8230300545692444, |
| "logits/rejected": -0.8042524456977844, |
| "logps/chosen": -2.524353504180908, |
| "logps/rejected": -2.8687357902526855, |
| "loss": 1.2653, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -5.048707008361816, |
| "rewards/margins": 0.6887640953063965, |
| "rewards/rejected": -5.737471580505371, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.6237198546415593, |
| "grad_norm": 4.326812744140625, |
| "learning_rate": 3.717836954120178e-07, |
| "logits/chosen": -0.7763381004333496, |
| "logits/rejected": -0.7852378487586975, |
| "logps/chosen": -2.4861948490142822, |
| "logps/rejected": -2.8822267055511475, |
| "loss": 1.124, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -4.9723896980285645, |
| "rewards/margins": 0.7920635938644409, |
| "rewards/rejected": -5.764453411102295, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.6258341592335646, |
| "grad_norm": 3.886293649673462, |
| "learning_rate": 3.6820641696546627e-07, |
| "logits/chosen": -0.8350138664245605, |
| "logits/rejected": -0.8594292998313904, |
| "logps/chosen": -2.1301493644714355, |
| "logps/rejected": -2.3678014278411865, |
| "loss": 1.3532, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -4.260298728942871, |
| "rewards/margins": 0.4753049314022064, |
| "rewards/rejected": -4.735602855682373, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.6279484638255699, |
| "grad_norm": 1.9318888187408447, |
| "learning_rate": 3.6463637388167875e-07, |
| "logits/chosen": -0.812870979309082, |
| "logits/rejected": -0.8393633961677551, |
| "logps/chosen": -2.0607728958129883, |
| "logps/rejected": -2.4457521438598633, |
| "loss": 1.2317, |
| "rewards/accuracies": 0.609375, |
| "rewards/chosen": -4.121545791625977, |
| "rewards/margins": 0.76995849609375, |
| "rewards/rejected": -4.891504287719727, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.6300627684175751, |
| "grad_norm": 2.731139898300171, |
| "learning_rate": 3.610737621531781e-07, |
| "logits/chosen": -0.7860711216926575, |
| "logits/rejected": -0.8006534576416016, |
| "logps/chosen": -1.9324530363082886, |
| "logps/rejected": -2.2838711738586426, |
| "loss": 1.2986, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -3.864906072616577, |
| "rewards/margins": 0.7028359174728394, |
| "rewards/rejected": -4.567742347717285, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.6321770730095805, |
| "grad_norm": 3.118441581726074, |
| "learning_rate": 3.575187773645112e-07, |
| "logits/chosen": -0.6946629285812378, |
| "logits/rejected": -0.6832380294799805, |
| "logps/chosen": -2.2569775581359863, |
| "logps/rejected": -2.6153128147125244, |
| "loss": 1.2166, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": -4.513955116271973, |
| "rewards/margins": 0.7166703343391418, |
| "rewards/rejected": -5.230625629425049, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.6342913776015857, |
| "grad_norm": 4.998100757598877, |
| "learning_rate": 3.5397161468151214e-07, |
| "logits/chosen": -0.7972643375396729, |
| "logits/rejected": -0.7864660620689392, |
| "logps/chosen": -2.227022886276245, |
| "logps/rejected": -2.57175350189209, |
| "loss": 1.2796, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -4.45404577255249, |
| "rewards/margins": 0.6894608736038208, |
| "rewards/rejected": -5.14350700378418, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.6364056821935911, |
| "grad_norm": 6.259451866149902, |
| "learning_rate": 3.5043246884058777e-07, |
| "logits/chosen": -0.6282143592834473, |
| "logits/rejected": -0.6314865350723267, |
| "logps/chosen": -2.4372308254241943, |
| "logps/rejected": -2.8582205772399902, |
| "loss": 1.1592, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -4.874461650848389, |
| "rewards/margins": 0.8419792056083679, |
| "rewards/rejected": -5.7164411544799805, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.6385199867855963, |
| "grad_norm": 2.577531337738037, |
| "learning_rate": 3.4690153413802653e-07, |
| "logits/chosen": -0.658220648765564, |
| "logits/rejected": -0.6330516934394836, |
| "logps/chosen": -2.6647050380706787, |
| "logps/rejected": -3.1917996406555176, |
| "loss": 1.2609, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -5.329410076141357, |
| "rewards/margins": 1.0541892051696777, |
| "rewards/rejected": -6.383599281311035, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.6406342913776016, |
| "grad_norm": 4.733935356140137, |
| "learning_rate": 3.4337900441933227e-07, |
| "logits/chosen": -0.5048555731773376, |
| "logits/rejected": -0.45112305879592896, |
| "logps/chosen": -2.5193920135498047, |
| "logps/rejected": -3.1279971599578857, |
| "loss": 1.0648, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -5.038784027099609, |
| "rewards/margins": 1.2172104120254517, |
| "rewards/rejected": -6.2559943199157715, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.6427485959696069, |
| "grad_norm": 5.54962158203125, |
| "learning_rate": 3.3986507306858125e-07, |
| "logits/chosen": -0.5305406451225281, |
| "logits/rejected": -0.5246613025665283, |
| "logps/chosen": -2.8851962089538574, |
| "logps/rejected": -3.248018264770508, |
| "loss": 1.4329, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -5.770392417907715, |
| "rewards/margins": 0.7256444692611694, |
| "rewards/rejected": -6.496036529541016, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.6448629005616121, |
| "grad_norm": 2.827944278717041, |
| "learning_rate": 3.363599329978066e-07, |
| "logits/chosen": -0.4795135259628296, |
| "logits/rejected": -0.4911767244338989, |
| "logps/chosen": -3.0268373489379883, |
| "logps/rejected": -3.4411511421203613, |
| "loss": 1.4083, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -6.053674697875977, |
| "rewards/margins": 0.8286278247833252, |
| "rewards/rejected": -6.882302284240723, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.6469772051536175, |
| "grad_norm": 5.35672664642334, |
| "learning_rate": 3.328637766364075e-07, |
| "logits/chosen": -0.4823904037475586, |
| "logits/rejected": -0.48555058240890503, |
| "logps/chosen": -2.990793466567993, |
| "logps/rejected": -3.529240846633911, |
| "loss": 1.1417, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -5.981586933135986, |
| "rewards/margins": 1.0768945217132568, |
| "rewards/rejected": -7.058481693267822, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.6490915097456227, |
| "grad_norm": 2.8072359561920166, |
| "learning_rate": 3.2937679592058396e-07, |
| "logits/chosen": -0.4903571605682373, |
| "logits/rejected": -0.46411609649658203, |
| "logps/chosen": -2.8665530681610107, |
| "logps/rejected": -3.542123556137085, |
| "loss": 1.2485, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -5.7331061363220215, |
| "rewards/margins": 1.3511409759521484, |
| "rewards/rejected": -7.08424711227417, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.651205814337628, |
| "grad_norm": 6.341434478759766, |
| "learning_rate": 3.2589918228280066e-07, |
| "logits/chosen": -0.4496378004550934, |
| "logits/rejected": -0.35389459133148193, |
| "logps/chosen": -2.8208916187286377, |
| "logps/rejected": -3.326601505279541, |
| "loss": 1.3089, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": -5.641783237457275, |
| "rewards/margins": 1.011419653892517, |
| "rewards/rejected": -6.653203010559082, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.6533201189296333, |
| "grad_norm": 2.5416784286499023, |
| "learning_rate": 3.2243112664127723e-07, |
| "logits/chosen": -0.44504135847091675, |
| "logits/rejected": -0.42088568210601807, |
| "logps/chosen": -2.7710533142089844, |
| "logps/rejected": -3.4406185150146484, |
| "loss": 1.2213, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -5.542106628417969, |
| "rewards/margins": 1.3391309976577759, |
| "rewards/rejected": -6.881237030029297, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.6554344235216386, |
| "grad_norm": 4.573229789733887, |
| "learning_rate": 3.189728193895069e-07, |
| "logits/chosen": -0.31100764870643616, |
| "logits/rejected": -0.32552966475486755, |
| "logps/chosen": -3.099289655685425, |
| "logps/rejected": -3.5152204036712646, |
| "loss": 1.3571, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": -6.19857931137085, |
| "rewards/margins": 0.8318620324134827, |
| "rewards/rejected": -7.030440807342529, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.6575487281136438, |
| "grad_norm": 3.7587928771972656, |
| "learning_rate": 3.155244503858041e-07, |
| "logits/chosen": -0.4225979447364807, |
| "logits/rejected": -0.43882372975349426, |
| "logps/chosen": -2.9082608222961426, |
| "logps/rejected": -3.2239482402801514, |
| "loss": 1.3415, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -5.816521644592285, |
| "rewards/margins": 0.6313749551773071, |
| "rewards/rejected": -6.447896480560303, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.6596630327056492, |
| "grad_norm": 5.79728889465332, |
| "learning_rate": 3.12086208942881e-07, |
| "logits/chosen": -0.48076939582824707, |
| "logits/rejected": -0.41990721225738525, |
| "logps/chosen": -2.7089650630950928, |
| "logps/rejected": -3.29990291595459, |
| "loss": 1.1423, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -5.4179301261901855, |
| "rewards/margins": 1.181876540184021, |
| "rewards/rejected": -6.59980583190918, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.6617773372976544, |
| "grad_norm": 7.405224800109863, |
| "learning_rate": 3.086582838174551e-07, |
| "logits/chosen": -0.48003631830215454, |
| "logits/rejected": -0.40571871399879456, |
| "logps/chosen": -2.53741455078125, |
| "logps/rejected": -3.0145747661590576, |
| "loss": 1.3247, |
| "rewards/accuracies": 0.609375, |
| "rewards/chosen": -5.0748291015625, |
| "rewards/margins": 0.9543203115463257, |
| "rewards/rejected": -6.029149532318115, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.6638916418896598, |
| "grad_norm": 6.371465682983398, |
| "learning_rate": 3.052408631998863e-07, |
| "logits/chosen": -0.42537638545036316, |
| "logits/rejected": -0.39384835958480835, |
| "logps/chosen": -3.006593942642212, |
| "logps/rejected": -3.4665465354919434, |
| "loss": 1.2648, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -6.013187885284424, |
| "rewards/margins": 0.919904887676239, |
| "rewards/rejected": -6.933093070983887, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.666005946481665, |
| "grad_norm": 4.65411376953125, |
| "learning_rate": 3.018341347038453e-07, |
| "logits/chosen": -0.38848310708999634, |
| "logits/rejected": -0.3435167670249939, |
| "logps/chosen": -2.9562084674835205, |
| "logps/rejected": -3.5491316318511963, |
| "loss": 1.1353, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -5.912416934967041, |
| "rewards/margins": 1.1858452558517456, |
| "rewards/rejected": -7.098263263702393, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.6681202510736703, |
| "grad_norm": 5.089771747589111, |
| "learning_rate": 2.9843828535601397e-07, |
| "logits/chosen": -0.3452882170677185, |
| "logits/rejected": -0.29303884506225586, |
| "logps/chosen": -2.5367987155914307, |
| "logps/rejected": -3.172724723815918, |
| "loss": 1.2002, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -5.073597431182861, |
| "rewards/margins": 1.2718524932861328, |
| "rewards/rejected": -6.345449447631836, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.6702345556656756, |
| "grad_norm": 4.480255603790283, |
| "learning_rate": 2.9505350158581697e-07, |
| "logits/chosen": -0.47401517629623413, |
| "logits/rejected": -0.45950815081596375, |
| "logps/chosen": -2.45076322555542, |
| "logps/rejected": -2.998079299926758, |
| "loss": 1.2545, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": -4.90152645111084, |
| "rewards/margins": 1.0946320295333862, |
| "rewards/rejected": -5.996158599853516, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.6723488602576809, |
| "grad_norm": 3.6318399906158447, |
| "learning_rate": 2.916799692151884e-07, |
| "logits/chosen": -0.20774951577186584, |
| "logits/rejected": -0.21114808320999146, |
| "logps/chosen": -2.8932981491088867, |
| "logps/rejected": -3.613022565841675, |
| "loss": 1.1187, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -5.786596298217773, |
| "rewards/margins": 1.4394491910934448, |
| "rewards/rejected": -7.22604513168335, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.6744631648496862, |
| "grad_norm": 6.601771831512451, |
| "learning_rate": 2.883178734483692e-07, |
| "logits/chosen": -0.3821495473384857, |
| "logits/rejected": -0.35181915760040283, |
| "logps/chosen": -2.5047662258148193, |
| "logps/rejected": -3.074918270111084, |
| "loss": 1.1545, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -5.009532451629639, |
| "rewards/margins": 1.1403042078018188, |
| "rewards/rejected": -6.149836540222168, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.6765774694416914, |
| "grad_norm": 3.077775716781616, |
| "learning_rate": 2.849673988617399e-07, |
| "logits/chosen": -0.4517952799797058, |
| "logits/rejected": -0.3880998194217682, |
| "logps/chosen": -2.5404443740844727, |
| "logps/rejected": -3.007855176925659, |
| "loss": 1.2441, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -5.080888748168945, |
| "rewards/margins": 0.9348208904266357, |
| "rewards/rejected": -6.015710353851318, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.6786917740336967, |
| "grad_norm": 4.130971908569336, |
| "learning_rate": 2.8162872939368674e-07, |
| "logits/chosen": -0.3455219566822052, |
| "logits/rejected": -0.3199109137058258, |
| "logps/chosen": -2.5115320682525635, |
| "logps/rejected": -3.0809438228607178, |
| "loss": 1.1814, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -5.023064136505127, |
| "rewards/margins": 1.1388237476348877, |
| "rewards/rejected": -6.1618876457214355, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.680806078625702, |
| "grad_norm": 6.414750099182129, |
| "learning_rate": 2.783020483345057e-07, |
| "logits/chosen": -0.500693142414093, |
| "logits/rejected": -0.43053722381591797, |
| "logps/chosen": -2.627499580383301, |
| "logps/rejected": -3.176882266998291, |
| "loss": 1.2207, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": -5.254999160766602, |
| "rewards/margins": 1.0987658500671387, |
| "rewards/rejected": -6.353764533996582, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.6829203832177073, |
| "grad_norm": 3.8955185413360596, |
| "learning_rate": 2.749875383163377e-07, |
| "logits/chosen": -0.3386150896549225, |
| "logits/rejected": -0.3456903100013733, |
| "logps/chosen": -2.5545601844787598, |
| "logps/rejected": -3.0574111938476562, |
| "loss": 1.2667, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -5.1091203689575195, |
| "rewards/margins": 1.0057018995285034, |
| "rewards/rejected": -6.1148223876953125, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.6850346878097126, |
| "grad_norm": 4.244959831237793, |
| "learning_rate": 2.7168538130314345e-07, |
| "logits/chosen": -0.4657687246799469, |
| "logits/rejected": -0.41878795623779297, |
| "logps/chosen": -2.3406989574432373, |
| "logps/rejected": -2.74613094329834, |
| "loss": 1.2982, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -4.681397914886475, |
| "rewards/margins": 0.8108637928962708, |
| "rewards/rejected": -5.49226188659668, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.6871489924017179, |
| "grad_norm": 8.914139747619629, |
| "learning_rate": 2.683957585807136e-07, |
| "logits/chosen": -0.42120760679244995, |
| "logits/rejected": -0.34997111558914185, |
| "logps/chosen": -2.4362924098968506, |
| "logps/rejected": -2.8844237327575684, |
| "loss": 1.3185, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -4.872584819793701, |
| "rewards/margins": 0.8962627649307251, |
| "rewards/rejected": -5.768847465515137, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.6892632969937231, |
| "grad_norm": 2.8318073749542236, |
| "learning_rate": 2.651188507467161e-07, |
| "logits/chosen": -0.4435175657272339, |
| "logits/rejected": -0.43688836693763733, |
| "logps/chosen": -2.316673994064331, |
| "logps/rejected": -2.6802306175231934, |
| "loss": 1.2727, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -4.633347988128662, |
| "rewards/margins": 0.7271134853363037, |
| "rewards/rejected": -5.360461235046387, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.6913776015857285, |
| "grad_norm": 9.15845012664795, |
| "learning_rate": 2.618548377007817e-07, |
| "logits/chosen": -0.4659804105758667, |
| "logits/rejected": -0.43525823950767517, |
| "logps/chosen": -2.3177073001861572, |
| "logps/rejected": -2.674837350845337, |
| "loss": 1.3204, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -4.6354146003723145, |
| "rewards/margins": 0.7142605781555176, |
| "rewards/rejected": -5.349674701690674, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.6934919061777337, |
| "grad_norm": 8.41653060913086, |
| "learning_rate": 2.5860389863462763e-07, |
| "logits/chosen": -0.42244386672973633, |
| "logits/rejected": -0.3488731384277344, |
| "logps/chosen": -2.3063669204711914, |
| "logps/rejected": -2.8124496936798096, |
| "loss": 1.2621, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -4.612733840942383, |
| "rewards/margins": 1.0121653079986572, |
| "rewards/rejected": -5.624899387359619, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.695606210769739, |
| "grad_norm": 8.558746337890625, |
| "learning_rate": 2.5536621202221986e-07, |
| "logits/chosen": -0.4081762433052063, |
| "logits/rejected": -0.3913821578025818, |
| "logps/chosen": -2.331026554107666, |
| "logps/rejected": -2.799482583999634, |
| "loss": 1.2435, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -4.662053108215332, |
| "rewards/margins": 0.9369123578071594, |
| "rewards/rejected": -5.598965167999268, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.6977205153617443, |
| "grad_norm": 7.550137519836426, |
| "learning_rate": 2.521419556099754e-07, |
| "logits/chosen": -0.5334538221359253, |
| "logits/rejected": -0.5046267509460449, |
| "logps/chosen": -2.3662197589874268, |
| "logps/rejected": -2.8178446292877197, |
| "loss": 1.2172, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -4.7324395179748535, |
| "rewards/margins": 0.9032500386238098, |
| "rewards/rejected": -5.6356892585754395, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.6998348199537496, |
| "grad_norm": 4.939478397369385, |
| "learning_rate": 2.4893130640700364e-07, |
| "logits/chosen": -0.5103824138641357, |
| "logits/rejected": -0.49076637625694275, |
| "logps/chosen": -2.0302557945251465, |
| "logps/rejected": -2.4443471431732178, |
| "loss": 1.1939, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -4.060511589050293, |
| "rewards/margins": 0.8281831741333008, |
| "rewards/rejected": -4.8886942863464355, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.7019491245457549, |
| "grad_norm": 5.584677219390869, |
| "learning_rate": 2.4573444067538985e-07, |
| "logits/chosen": -0.46035417914390564, |
| "logits/rejected": -0.4546043574810028, |
| "logps/chosen": -2.1907548904418945, |
| "logps/rejected": -2.4913454055786133, |
| "loss": 1.4253, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": -4.381509780883789, |
| "rewards/margins": 0.6011807322502136, |
| "rewards/rejected": -4.982690811157227, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.7040634291377602, |
| "grad_norm": 3.398441791534424, |
| "learning_rate": 2.425515339205165e-07, |
| "logits/chosen": -0.5569466352462769, |
| "logits/rejected": -0.5756793022155762, |
| "logps/chosen": -2.037411689758301, |
| "logps/rejected": -2.3700244426727295, |
| "loss": 1.3425, |
| "rewards/accuracies": 0.5703125, |
| "rewards/chosen": -4.074823379516602, |
| "rewards/margins": 0.6652255654335022, |
| "rewards/rejected": -4.740048885345459, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.7061777337297654, |
| "grad_norm": 8.54529094696045, |
| "learning_rate": 2.3938276088143e-07, |
| "logits/chosen": -0.5746757388114929, |
| "logits/rejected": -0.5874296426773071, |
| "logps/chosen": -2.1479601860046387, |
| "logps/rejected": -2.584625244140625, |
| "loss": 1.2366, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -4.295920372009277, |
| "rewards/margins": 0.8733301758766174, |
| "rewards/rejected": -5.16925048828125, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.7082920383217707, |
| "grad_norm": 5.141815662384033, |
| "learning_rate": 2.362282955212473e-07, |
| "logits/chosen": -0.6492913961410522, |
| "logits/rejected": -0.5812432765960693, |
| "logps/chosen": -1.9753435850143433, |
| "logps/rejected": -2.340383768081665, |
| "loss": 1.2197, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -3.9506871700286865, |
| "rewards/margins": 0.7300805449485779, |
| "rewards/rejected": -4.68076753616333, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.710406342913776, |
| "grad_norm": 5.991698265075684, |
| "learning_rate": 2.3308831101760483e-07, |
| "logits/chosen": -0.6887751221656799, |
| "logits/rejected": -0.6923843622207642, |
| "logps/chosen": -1.577715277671814, |
| "logps/rejected": -1.861379623413086, |
| "loss": 1.2608, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -3.155430555343628, |
| "rewards/margins": 0.5673283338546753, |
| "rewards/rejected": -3.722759246826172, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.7125206475057813, |
| "grad_norm": 1.5719850063323975, |
| "learning_rate": 2.2996297975315097e-07, |
| "logits/chosen": -0.6095813512802124, |
| "logits/rejected": -0.5842909216880798, |
| "logps/chosen": -1.6973541975021362, |
| "logps/rejected": -2.1261086463928223, |
| "loss": 1.2424, |
| "rewards/accuracies": 0.609375, |
| "rewards/chosen": -3.3947083950042725, |
| "rewards/margins": 0.857509195804596, |
| "rewards/rejected": -4.2522172927856445, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.7146349520977866, |
| "grad_norm": 4.785243511199951, |
| "learning_rate": 2.2685247330608414e-07, |
| "logits/chosen": -0.7062411308288574, |
| "logits/rejected": -0.6849475502967834, |
| "logps/chosen": -1.6659798622131348, |
| "logps/rejected": -1.980202555656433, |
| "loss": 1.2512, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": -3.3319597244262695, |
| "rewards/margins": 0.6284454464912415, |
| "rewards/rejected": -3.960405111312866, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.7167492566897918, |
| "grad_norm": 4.3183674812316895, |
| "learning_rate": 2.2375696244073123e-07, |
| "logits/chosen": -0.6655697822570801, |
| "logits/rejected": -0.6642571687698364, |
| "logps/chosen": -1.615012764930725, |
| "logps/rejected": -1.9022549390792847, |
| "loss": 1.398, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -3.23002552986145, |
| "rewards/margins": 0.5744845867156982, |
| "rewards/rejected": -3.8045098781585693, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.7188635612817972, |
| "grad_norm": 3.458740472793579, |
| "learning_rate": 2.2067661709817382e-07, |
| "logits/chosen": -0.6138105988502502, |
| "logits/rejected": -0.6241220235824585, |
| "logps/chosen": -1.5244299173355103, |
| "logps/rejected": -1.8252849578857422, |
| "loss": 1.2257, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -3.0488598346710205, |
| "rewards/margins": 0.6017097234725952, |
| "rewards/rejected": -3.6505699157714844, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.7209778658738024, |
| "grad_norm": 3.3990859985351562, |
| "learning_rate": 2.1761160638691838e-07, |
| "logits/chosen": -0.596839964389801, |
| "logits/rejected": -0.5929630398750305, |
| "logps/chosen": -1.4333155155181885, |
| "logps/rejected": -1.820554494857788, |
| "loss": 1.1124, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -2.866631031036377, |
| "rewards/margins": 0.7744779586791992, |
| "rewards/rejected": -3.641108989715576, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.7230921704658078, |
| "grad_norm": 2.742016315460205, |
| "learning_rate": 2.1456209857361246e-07, |
| "logits/chosen": -0.6483213901519775, |
| "logits/rejected": -0.6418218612670898, |
| "logps/chosen": -1.4174959659576416, |
| "logps/rejected": -1.831233263015747, |
| "loss": 1.1372, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -2.834991931915283, |
| "rewards/margins": 0.8274745941162109, |
| "rewards/rejected": -3.662466526031494, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.725206475057813, |
| "grad_norm": 2.5489015579223633, |
| "learning_rate": 2.1152826107380651e-07, |
| "logits/chosen": -0.599895179271698, |
| "logits/rejected": -0.6154446005821228, |
| "logps/chosen": -1.4996072053909302, |
| "logps/rejected": -1.7961615324020386, |
| "loss": 1.2288, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -2.9992144107818604, |
| "rewards/margins": 0.5931087732315063, |
| "rewards/rejected": -3.592323064804077, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.7273207796498183, |
| "grad_norm": 2.8836190700531006, |
| "learning_rate": 2.0851026044276405e-07, |
| "logits/chosen": -0.7359989285469055, |
| "logits/rejected": -0.7111036777496338, |
| "logps/chosen": -1.32615065574646, |
| "logps/rejected": -1.6067696809768677, |
| "loss": 1.2088, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -2.65230131149292, |
| "rewards/margins": 0.5612384080886841, |
| "rewards/rejected": -3.2135393619537354, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.7294350842418236, |
| "grad_norm": 3.1838135719299316, |
| "learning_rate": 2.0550826236631596e-07, |
| "logits/chosen": -0.6709272265434265, |
| "logits/rejected": -0.6708023548126221, |
| "logps/chosen": -1.2859303951263428, |
| "logps/rejected": -1.6929675340652466, |
| "loss": 1.1446, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -2.5718607902526855, |
| "rewards/margins": 0.8140743374824524, |
| "rewards/rejected": -3.385935068130493, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.7315493888338289, |
| "grad_norm": 2.4209675788879395, |
| "learning_rate": 2.025224316517663e-07, |
| "logits/chosen": -0.7540403604507446, |
| "logits/rejected": -0.7601196765899658, |
| "logps/chosen": -1.3634543418884277, |
| "logps/rejected": -1.6112797260284424, |
| "loss": 1.2561, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -2.7269086837768555, |
| "rewards/margins": 0.4956510066986084, |
| "rewards/rejected": -3.2225594520568848, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.7336636934258342, |
| "grad_norm": 5.405437469482422, |
| "learning_rate": 1.9955293221884402e-07, |
| "logits/chosen": -0.7241419553756714, |
| "logits/rejected": -0.7224253416061401, |
| "logps/chosen": -1.2650585174560547, |
| "logps/rejected": -1.639666199684143, |
| "loss": 1.1565, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -2.5301170349121094, |
| "rewards/margins": 0.7492151856422424, |
| "rewards/rejected": -3.279332399368286, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.7357779980178395, |
| "grad_norm": 1.5863631963729858, |
| "learning_rate": 1.9659992709070344e-07, |
| "logits/chosen": -0.7479431629180908, |
| "logits/rejected": -0.7219806909561157, |
| "logps/chosen": -1.294840931892395, |
| "logps/rejected": -1.6082017421722412, |
| "loss": 1.1693, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -2.58968186378479, |
| "rewards/margins": 0.6267215013504028, |
| "rewards/rejected": -3.2164034843444824, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.7378923026098447, |
| "grad_norm": 1.7051454782485962, |
| "learning_rate": 1.936635783849742e-07, |
| "logits/chosen": -0.6940132975578308, |
| "logits/rejected": -0.7377297878265381, |
| "logps/chosen": -1.1897408962249756, |
| "logps/rejected": -1.631073236465454, |
| "loss": 1.1069, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -2.379481792449951, |
| "rewards/margins": 0.8826643228530884, |
| "rewards/rejected": -3.262146472930908, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.74000660720185, |
| "grad_norm": 2.704514980316162, |
| "learning_rate": 1.907440473048626e-07, |
| "logits/chosen": -0.6926394104957581, |
| "logits/rejected": -0.7064180374145508, |
| "logps/chosen": -1.1691362857818604, |
| "logps/rejected": -1.511006236076355, |
| "loss": 1.1541, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -2.3382725715637207, |
| "rewards/margins": 0.6837398409843445, |
| "rewards/rejected": -3.02201247215271, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.7421209117938553, |
| "grad_norm": 2.3685505390167236, |
| "learning_rate": 1.8784149413030004e-07, |
| "logits/chosen": -0.7785338759422302, |
| "logits/rejected": -0.7802280187606812, |
| "logps/chosen": -1.267012119293213, |
| "logps/rejected": -1.5235991477966309, |
| "loss": 1.177, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -2.534024238586426, |
| "rewards/margins": 0.5131738781929016, |
| "rewards/rejected": -3.0471982955932617, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.7442352163858605, |
| "grad_norm": 2.8642280101776123, |
| "learning_rate": 1.849560782091445e-07, |
| "logits/chosen": -0.8269493579864502, |
| "logits/rejected": -0.8431333899497986, |
| "logps/chosen": -1.228893518447876, |
| "logps/rejected": -1.5784943103790283, |
| "loss": 1.1764, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -2.457787036895752, |
| "rewards/margins": 0.6992017030715942, |
| "rewards/rejected": -3.1569886207580566, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.7463495209778659, |
| "grad_norm": 4.742166996002197, |
| "learning_rate": 1.8208795794843246e-07, |
| "logits/chosen": -0.764488160610199, |
| "logits/rejected": -0.7553139925003052, |
| "logps/chosen": -1.3095338344573975, |
| "logps/rejected": -1.6771752834320068, |
| "loss": 1.0957, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -2.619067668914795, |
| "rewards/margins": 0.7352830171585083, |
| "rewards/rejected": -3.3543505668640137, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.7484638255698711, |
| "grad_norm": 3.543769359588623, |
| "learning_rate": 1.7923729080568239e-07, |
| "logits/chosen": -0.7355642914772034, |
| "logits/rejected": -0.7744429707527161, |
| "logps/chosen": -1.3419017791748047, |
| "logps/rejected": -1.591749668121338, |
| "loss": 1.2579, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -2.6838035583496094, |
| "rewards/margins": 0.4996955990791321, |
| "rewards/rejected": -3.183499336242676, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.7505781301618765, |
| "grad_norm": 4.187947750091553, |
| "learning_rate": 1.764042332802506e-07, |
| "logits/chosen": -0.7009099721908569, |
| "logits/rejected": -0.6947562098503113, |
| "logps/chosen": -1.3167665004730225, |
| "logps/rejected": -1.640596866607666, |
| "loss": 1.2269, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": -2.633533000946045, |
| "rewards/margins": 0.6476608514785767, |
| "rewards/rejected": -3.281193733215332, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.7526924347538817, |
| "grad_norm": 1.7813458442687988, |
| "learning_rate": 1.7358894090473924e-07, |
| "logits/chosen": -0.7276792526245117, |
| "logits/rejected": -0.7536065578460693, |
| "logps/chosen": -1.401429295539856, |
| "logps/rejected": -1.7458314895629883, |
| "loss": 1.1934, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -2.802858591079712, |
| "rewards/margins": 0.6888045072555542, |
| "rewards/rejected": -3.4916629791259766, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.754806739345887, |
| "grad_norm": 2.3192296028137207, |
| "learning_rate": 1.7079156823645801e-07, |
| "logits/chosen": -0.6756848096847534, |
| "logits/rejected": -0.6988381743431091, |
| "logps/chosen": -1.36654531955719, |
| "logps/rejected": -1.6672351360321045, |
| "loss": 1.1928, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -2.73309063911438, |
| "rewards/margins": 0.6013798117637634, |
| "rewards/rejected": -3.334470272064209, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.7569210439378923, |
| "grad_norm": 2.7722420692443848, |
| "learning_rate": 1.6801226884893893e-07, |
| "logits/chosen": -0.6857397556304932, |
| "logits/rejected": -0.7169467806816101, |
| "logps/chosen": -1.4047114849090576, |
| "logps/rejected": -1.733205795288086, |
| "loss": 1.16, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -2.8094229698181152, |
| "rewards/margins": 0.6569885015487671, |
| "rewards/rejected": -3.466411590576172, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.7590353485298976, |
| "grad_norm": 6.300495624542236, |
| "learning_rate": 1.6525119532350506e-07, |
| "logits/chosen": -0.7457281947135925, |
| "logits/rejected": -0.7319377660751343, |
| "logps/chosen": -1.282365083694458, |
| "logps/rejected": -1.6675825119018555, |
| "loss": 1.0742, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -2.564730167388916, |
| "rewards/margins": 0.7704350352287292, |
| "rewards/rejected": -3.335165023803711, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.7611496531219029, |
| "grad_norm": 3.5068228244781494, |
| "learning_rate": 1.6250849924089482e-07, |
| "logits/chosen": -0.7112680077552795, |
| "logits/rejected": -0.7166794538497925, |
| "logps/chosen": -1.3996254205703735, |
| "logps/rejected": -1.6635833978652954, |
| "loss": 1.2438, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -2.799250841140747, |
| "rewards/margins": 0.5279159545898438, |
| "rewards/rejected": -3.327166795730591, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.7632639577139082, |
| "grad_norm": 1.421538233757019, |
| "learning_rate": 1.5978433117293883e-07, |
| "logits/chosen": -0.7009663581848145, |
| "logits/rejected": -0.6878695487976074, |
| "logps/chosen": -1.4174691438674927, |
| "logps/rejected": -1.802457332611084, |
| "loss": 1.0885, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -2.8349382877349854, |
| "rewards/margins": 0.7699761986732483, |
| "rewards/rejected": -3.604914665222168, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.7653782623059134, |
| "grad_norm": 3.2645766735076904, |
| "learning_rate": 1.5707884067429471e-07, |
| "logits/chosen": -0.6865817904472351, |
| "logits/rejected": -0.7084690928459167, |
| "logps/chosen": -1.377517819404602, |
| "logps/rejected": -1.7079989910125732, |
| "loss": 1.2371, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -2.755035638809204, |
| "rewards/margins": 0.660962700843811, |
| "rewards/rejected": -3.4159979820251465, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.7674925668979188, |
| "grad_norm": 1.973783254623413, |
| "learning_rate": 1.5439217627423695e-07, |
| "logits/chosen": -0.7317283153533936, |
| "logits/rejected": -0.7571225166320801, |
| "logps/chosen": -1.63040030002594, |
| "logps/rejected": -2.027442216873169, |
| "loss": 1.1614, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -3.26080060005188, |
| "rewards/margins": 0.7940834760665894, |
| "rewards/rejected": -4.054884433746338, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.769606871489924, |
| "grad_norm": 4.545448303222656, |
| "learning_rate": 1.5172448546850163e-07, |
| "logits/chosen": -0.6746503710746765, |
| "logits/rejected": -0.7073549628257751, |
| "logps/chosen": -1.321073055267334, |
| "logps/rejected": -1.6741642951965332, |
| "loss": 1.1609, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -2.642146110534668, |
| "rewards/margins": 0.7061826586723328, |
| "rewards/rejected": -3.3483285903930664, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.7717211760819292, |
| "grad_norm": 8.678997039794922, |
| "learning_rate": 1.490759147111894e-07, |
| "logits/chosen": -0.6089351773262024, |
| "logits/rejected": -0.6172072291374207, |
| "logps/chosen": -1.6598318815231323, |
| "logps/rejected": -1.9151239395141602, |
| "loss": 1.2762, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -3.3196637630462646, |
| "rewards/margins": 0.5105838775634766, |
| "rewards/rejected": -3.8302478790283203, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.7738354806739346, |
| "grad_norm": 3.29367733001709, |
| "learning_rate": 1.4644660940672627e-07, |
| "logits/chosen": -0.6255152821540833, |
| "logits/rejected": -0.6178345680236816, |
| "logps/chosen": -1.7635339498519897, |
| "logps/rejected": -2.02409029006958, |
| "loss": 1.4469, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": -3.5270678997039795, |
| "rewards/margins": 0.5211121439933777, |
| "rewards/rejected": -4.04818058013916, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.7759497852659398, |
| "grad_norm": 3.500715732574463, |
| "learning_rate": 1.438367139018796e-07, |
| "logits/chosen": -0.6738446354866028, |
| "logits/rejected": -0.671849250793457, |
| "logps/chosen": -1.603959560394287, |
| "logps/rejected": -2.140998363494873, |
| "loss": 0.9771, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -3.207919120788574, |
| "rewards/margins": 1.0740783214569092, |
| "rewards/rejected": -4.281996726989746, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.7780640898579452, |
| "grad_norm": 2.8842501640319824, |
| "learning_rate": 1.412463714778343e-07, |
| "logits/chosen": -0.6544129252433777, |
| "logits/rejected": -0.6667245030403137, |
| "logps/chosen": -1.7409751415252686, |
| "logps/rejected": -2.1441538333892822, |
| "loss": 1.1043, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -3.481950283050537, |
| "rewards/margins": 0.806357741355896, |
| "rewards/rejected": -4.2883076667785645, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.7801783944499504, |
| "grad_norm": 3.7606077194213867, |
| "learning_rate": 1.3867572434232728e-07, |
| "logits/chosen": -0.6620441675186157, |
| "logits/rejected": -0.6536539793014526, |
| "logps/chosen": -1.6755543947219849, |
| "logps/rejected": -2.012425184249878, |
| "loss": 1.2249, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -3.3511087894439697, |
| "rewards/margins": 0.6737421154975891, |
| "rewards/rejected": -4.024850368499756, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.7822926990419558, |
| "grad_norm": 3.284456729888916, |
| "learning_rate": 1.3612491362183887e-07, |
| "logits/chosen": -0.6353476047515869, |
| "logits/rejected": -0.6363587975502014, |
| "logps/chosen": -1.6001538038253784, |
| "logps/rejected": -2.0670526027679443, |
| "loss": 1.0746, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -3.200307607650757, |
| "rewards/margins": 0.9337971210479736, |
| "rewards/rejected": -4.134105205535889, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.784407003633961, |
| "grad_norm": 1.9063444137573242, |
| "learning_rate": 1.3359407935384642e-07, |
| "logits/chosen": -0.6120063662528992, |
| "logits/rejected": -0.5794797539710999, |
| "logps/chosen": -1.4489734172821045, |
| "logps/rejected": -1.9216854572296143, |
| "loss": 1.0928, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -2.897946834564209, |
| "rewards/margins": 0.9454240798950195, |
| "rewards/rejected": -3.8433709144592285, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.7865213082259663, |
| "grad_norm": 6.973724842071533, |
| "learning_rate": 1.3108336047913633e-07, |
| "logits/chosen": -0.6082984209060669, |
| "logits/rejected": -0.6162828207015991, |
| "logps/chosen": -1.7623229026794434, |
| "logps/rejected": -2.239130735397339, |
| "loss": 1.2665, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -3.5246458053588867, |
| "rewards/margins": 0.9536150693893433, |
| "rewards/rejected": -4.478261470794678, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.7886356128179716, |
| "grad_norm": 3.874128580093384, |
| "learning_rate": 1.2859289483417557e-07, |
| "logits/chosen": -0.5540960431098938, |
| "logits/rejected": -0.5091680884361267, |
| "logps/chosen": -1.85587739944458, |
| "logps/rejected": -2.3959312438964844, |
| "loss": 1.0672, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -3.71175479888916, |
| "rewards/margins": 1.0801074504852295, |
| "rewards/rejected": -4.791862487792969, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.7907499174099769, |
| "grad_norm": 13.771154403686523, |
| "learning_rate": 1.261228191435445e-07, |
| "logits/chosen": -0.599963903427124, |
| "logits/rejected": -0.5765703916549683, |
| "logps/chosen": -1.7974251508712769, |
| "logps/rejected": -2.2272088527679443, |
| "loss": 1.1994, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -3.5948503017425537, |
| "rewards/margins": 0.8595672249794006, |
| "rewards/rejected": -4.454417705535889, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.7928642220019821, |
| "grad_norm": 2.5084969997406006, |
| "learning_rate": 1.2367326901243214e-07, |
| "logits/chosen": -0.5945304036140442, |
| "logits/rejected": -0.6021737456321716, |
| "logps/chosen": -1.9855573177337646, |
| "logps/rejected": -2.3953022956848145, |
| "loss": 1.2576, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -3.9711146354675293, |
| "rewards/margins": 0.8194906711578369, |
| "rewards/rejected": -4.790604591369629, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.7949785265939875, |
| "grad_norm": 4.571497440338135, |
| "learning_rate": 1.2124437891918993e-07, |
| "logits/chosen": -0.5888144373893738, |
| "logits/rejected": -0.5575076937675476, |
| "logps/chosen": -1.8334908485412598, |
| "logps/rejected": -2.153212070465088, |
| "loss": 1.2104, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -3.6669816970825195, |
| "rewards/margins": 0.639442503452301, |
| "rewards/rejected": -4.306424140930176, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.7970928311859927, |
| "grad_norm": 5.023235321044922, |
| "learning_rate": 1.1883628220795005e-07, |
| "logits/chosen": -0.632038414478302, |
| "logits/rejected": -0.6368271708488464, |
| "logps/chosen": -1.8573570251464844, |
| "logps/rejected": -2.291320323944092, |
| "loss": 1.1719, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -3.7147140502929688, |
| "rewards/margins": 0.8679270148277283, |
| "rewards/rejected": -4.582640647888184, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.7992071357779981, |
| "grad_norm": 4.98567533493042, |
| "learning_rate": 1.1644911108130434e-07, |
| "logits/chosen": -0.5647228360176086, |
| "logits/rejected": -0.5541558265686035, |
| "logps/chosen": -1.8232372999191284, |
| "logps/rejected": -2.2992348670959473, |
| "loss": 1.1476, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -3.646474599838257, |
| "rewards/margins": 0.9519950747489929, |
| "rewards/rejected": -4.5984697341918945, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.8013214403700033, |
| "grad_norm": 9.514540672302246, |
| "learning_rate": 1.1408299659304682e-07, |
| "logits/chosen": -0.5385195016860962, |
| "logits/rejected": -0.5475942492485046, |
| "logps/chosen": -2.077877998352051, |
| "logps/rejected": -2.4877052307128906, |
| "loss": 1.1605, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -4.155755996704102, |
| "rewards/margins": 0.8196545243263245, |
| "rewards/rejected": -4.975410461425781, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.8034357449620085, |
| "grad_norm": 7.652558326721191, |
| "learning_rate": 1.1173806864097884e-07, |
| "logits/chosen": -0.5651392936706543, |
| "logits/rejected": -0.5097556114196777, |
| "logps/chosen": -1.9452521800994873, |
| "logps/rejected": -2.376047134399414, |
| "loss": 1.2004, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -3.8905043601989746, |
| "rewards/margins": 0.8615895509719849, |
| "rewards/rejected": -4.752094268798828, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.8055500495540139, |
| "grad_norm": 6.184218406677246, |
| "learning_rate": 1.0941445595977766e-07, |
| "logits/chosen": -0.5738644599914551, |
| "logits/rejected": -0.570101797580719, |
| "logps/chosen": -2.0233359336853027, |
| "logps/rejected": -2.5829384326934814, |
| "loss": 1.1539, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -4.0466718673706055, |
| "rewards/margins": 1.1192048788070679, |
| "rewards/rejected": -5.165876865386963, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.8076643541460191, |
| "grad_norm": 4.697547435760498, |
| "learning_rate": 1.0711228611392936e-07, |
| "logits/chosen": -0.5766915082931519, |
| "logits/rejected": -0.5619411468505859, |
| "logps/chosen": -2.0546395778656006, |
| "logps/rejected": -2.4459054470062256, |
| "loss": 1.2723, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -4.109279155731201, |
| "rewards/margins": 0.7825320959091187, |
| "rewards/rejected": -4.891810894012451, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.8097786587380245, |
| "grad_norm": 5.595128536224365, |
| "learning_rate": 1.0483168549072518e-07, |
| "logits/chosen": -0.6808648109436035, |
| "logits/rejected": -0.6518751382827759, |
| "logps/chosen": -1.9909974336624146, |
| "logps/rejected": -2.3775596618652344, |
| "loss": 1.2501, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -3.981994867324829, |
| "rewards/margins": 0.7731241583824158, |
| "rewards/rejected": -4.755119323730469, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.8118929633300297, |
| "grad_norm": 3.6460607051849365, |
| "learning_rate": 1.0257277929332331e-07, |
| "logits/chosen": -0.6901826858520508, |
| "logits/rejected": -0.703309953212738, |
| "logps/chosen": -1.9317903518676758, |
| "logps/rejected": -2.322279930114746, |
| "loss": 1.1945, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -3.8635807037353516, |
| "rewards/margins": 0.780979335308075, |
| "rewards/rejected": -4.644559860229492, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.814007267922035, |
| "grad_norm": 8.366463661193848, |
| "learning_rate": 1.0033569153387561e-07, |
| "logits/chosen": -0.5720599293708801, |
| "logits/rejected": -0.5666248798370361, |
| "logps/chosen": -1.9946173429489136, |
| "logps/rejected": -2.3951826095581055, |
| "loss": 1.3349, |
| "rewards/accuracies": 0.546875, |
| "rewards/chosen": -3.989234685897827, |
| "rewards/margins": 0.8011305332183838, |
| "rewards/rejected": -4.790365219116211, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.8161215725140403, |
| "grad_norm": 1.8578377962112427, |
| "learning_rate": 9.812054502671834e-08, |
| "logits/chosen": -0.6122175455093384, |
| "logits/rejected": -0.5665942430496216, |
| "logps/chosen": -2.1414878368377686, |
| "logps/rejected": -2.646432399749756, |
| "loss": 1.1834, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -4.282975673675537, |
| "rewards/margins": 1.009889006614685, |
| "rewards/rejected": -5.292864799499512, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.8182358771060456, |
| "grad_norm": 4.7323408126831055, |
| "learning_rate": 9.592746138163093e-08, |
| "logits/chosen": -0.5390607118606567, |
| "logits/rejected": -0.5227072834968567, |
| "logps/chosen": -2.1249067783355713, |
| "logps/rejected": -2.688115119934082, |
| "loss": 1.2211, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -4.249813556671143, |
| "rewards/margins": 1.1264164447784424, |
| "rewards/rejected": -5.376230239868164, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.8203501816980509, |
| "grad_norm": 2.5557284355163574, |
| "learning_rate": 9.375656099715934e-08, |
| "logits/chosen": -0.5654515027999878, |
| "logits/rejected": -0.5636597275733948, |
| "logps/chosen": -2.126107692718506, |
| "logps/rejected": -2.606684684753418, |
| "loss": 1.1995, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -4.252215385437012, |
| "rewards/margins": 0.9611539244651794, |
| "rewards/rejected": -5.213369369506836, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.8224644862900562, |
| "grad_norm": 4.177574634552002, |
| "learning_rate": 9.160796305400609e-08, |
| "logits/chosen": -0.6432445645332336, |
| "logits/rejected": -0.6587055921554565, |
| "logps/chosen": -2.0785441398620605, |
| "logps/rejected": -2.4507219791412354, |
| "loss": 1.2339, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -4.157088279724121, |
| "rewards/margins": 0.7443561553955078, |
| "rewards/rejected": -4.901443958282471, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.8245787908820614, |
| "grad_norm": 5.901131629943848, |
| "learning_rate": 8.9481785508487e-08, |
| "logits/chosen": -0.588135302066803, |
| "logits/rejected": -0.5850880742073059, |
| "logps/chosen": -2.225466251373291, |
| "logps/rejected": -2.638160467147827, |
| "loss": 1.2255, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -4.450932502746582, |
| "rewards/margins": 0.8253881335258484, |
| "rewards/rejected": -5.276320934295654, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.8266930954740668, |
| "grad_norm": 2.727555751800537, |
| "learning_rate": 8.737814508605673e-08, |
| "logits/chosen": -0.5863823294639587, |
| "logits/rejected": -0.590294599533081, |
| "logps/chosen": -1.9851064682006836, |
| "logps/rejected": -2.579831600189209, |
| "loss": 1.0113, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -3.970212936401367, |
| "rewards/margins": 1.1894500255584717, |
| "rewards/rejected": -5.159663200378418, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.828807400066072, |
| "grad_norm": 9.048048973083496, |
| "learning_rate": 8.529715727489912e-08, |
| "logits/chosen": -0.5600543022155762, |
| "logits/rejected": -0.5537065267562866, |
| "logps/chosen": -1.9846975803375244, |
| "logps/rejected": -2.2676990032196045, |
| "loss": 1.3045, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -3.969395160675049, |
| "rewards/margins": 0.5660032629966736, |
| "rewards/rejected": -4.535398006439209, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.8309217046580774, |
| "grad_norm": 3.4390201568603516, |
| "learning_rate": 8.323893631958806e-08, |
| "logits/chosen": -0.6335893273353577, |
| "logits/rejected": -0.6190727949142456, |
| "logps/chosen": -1.908363938331604, |
| "logps/rejected": -2.510305166244507, |
| "loss": 1.0262, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -3.816727876663208, |
| "rewards/margins": 1.2038825750350952, |
| "rewards/rejected": -5.020610332489014, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.8330360092500826, |
| "grad_norm": 5.347372531890869, |
| "learning_rate": 8.120359521481501e-08, |
| "logits/chosen": -0.6408874988555908, |
| "logits/rejected": -0.643690288066864, |
| "logps/chosen": -2.019606828689575, |
| "logps/rejected": -2.3068103790283203, |
| "loss": 1.3028, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -4.03921365737915, |
| "rewards/margins": 0.574406623840332, |
| "rewards/rejected": -4.613620758056641, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.8351503138420878, |
| "grad_norm": 2.2186920642852783, |
| "learning_rate": 7.9191245699186e-08, |
| "logits/chosen": -0.7156819105148315, |
| "logits/rejected": -0.6814436316490173, |
| "logps/chosen": -2.108549118041992, |
| "logps/rejected": -2.608646869659424, |
| "loss": 1.2948, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -4.217098236083984, |
| "rewards/margins": 1.0001959800720215, |
| "rewards/rejected": -5.217293739318848, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.8372646184340932, |
| "grad_norm": 2.6448726654052734, |
| "learning_rate": 7.720199824908691e-08, |
| "logits/chosen": -0.5753149390220642, |
| "logits/rejected": -0.6065633296966553, |
| "logps/chosen": -2.2337100505828857, |
| "logps/rejected": -2.6677160263061523, |
| "loss": 1.2273, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -4.4674201011657715, |
| "rewards/margins": 0.868012011051178, |
| "rewards/rejected": -5.335432052612305, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.8393789230260984, |
| "grad_norm": 6.596648216247559, |
| "learning_rate": 7.523596207261907e-08, |
| "logits/chosen": -0.5432775616645813, |
| "logits/rejected": -0.4928567409515381, |
| "logps/chosen": -2.1113924980163574, |
| "logps/rejected": -2.482846975326538, |
| "loss": 1.319, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -4.222784996032715, |
| "rewards/margins": 0.7429092526435852, |
| "rewards/rejected": -4.965693950653076, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.8414932276181037, |
| "grad_norm": 3.9646811485290527, |
| "learning_rate": 7.329324510360269e-08, |
| "logits/chosen": -0.5816119909286499, |
| "logits/rejected": -0.564030110836029, |
| "logps/chosen": -2.0296411514282227, |
| "logps/rejected": -2.5152456760406494, |
| "loss": 1.1645, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -4.059282302856445, |
| "rewards/margins": 0.9712092876434326, |
| "rewards/rejected": -5.030491352081299, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.843607532210109, |
| "grad_norm": 2.7787463665008545, |
| "learning_rate": 7.137395399565249e-08, |
| "logits/chosen": -0.6342184543609619, |
| "logits/rejected": -0.6318203210830688, |
| "logps/chosen": -2.0209803581237793, |
| "logps/rejected": -2.5250658988952637, |
| "loss": 1.1822, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -4.041960716247559, |
| "rewards/margins": 1.0081708431243896, |
| "rewards/rejected": -5.050131797790527, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.8457218368021143, |
| "grad_norm": 4.476524353027344, |
| "learning_rate": 6.947819411632222e-08, |
| "logits/chosen": -0.5809480547904968, |
| "logits/rejected": -0.5740150213241577, |
| "logps/chosen": -1.9072691202163696, |
| "logps/rejected": -2.2942898273468018, |
| "loss": 1.3214, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -3.8145382404327393, |
| "rewards/margins": 0.7740417718887329, |
| "rewards/rejected": -4.5885796546936035, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.8478361413941196, |
| "grad_norm": 2.47866153717041, |
| "learning_rate": 6.760606954131965e-08, |
| "logits/chosen": -0.5540263652801514, |
| "logits/rejected": -0.5378059148788452, |
| "logps/chosen": -1.8337305784225464, |
| "logps/rejected": -2.264974594116211, |
| "loss": 1.2396, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -3.6674611568450928, |
| "rewards/margins": 0.8624882698059082, |
| "rewards/rejected": -4.529949188232422, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.8499504459861249, |
| "grad_norm": 2.800645112991333, |
| "learning_rate": 6.575768304879292e-08, |
| "logits/chosen": -0.6384072303771973, |
| "logits/rejected": -0.6310533285140991, |
| "logps/chosen": -1.9723026752471924, |
| "logps/rejected": -2.3342039585113525, |
| "loss": 1.2746, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": -3.9446053504943848, |
| "rewards/margins": 0.7238021492958069, |
| "rewards/rejected": -4.668407917022705, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.8520647505781301, |
| "grad_norm": 2.794485092163086, |
| "learning_rate": 6.3933136113689e-08, |
| "logits/chosen": -0.7269207239151001, |
| "logits/rejected": -0.7003817558288574, |
| "logps/chosen": -1.8535553216934204, |
| "logps/rejected": -2.2630820274353027, |
| "loss": 1.1774, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -3.707110643386841, |
| "rewards/margins": 0.8190534114837646, |
| "rewards/rejected": -4.5261640548706055, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.8541790551701355, |
| "grad_norm": 12.197257041931152, |
| "learning_rate": 6.213252890218162e-08, |
| "logits/chosen": -0.5296715497970581, |
| "logits/rejected": -0.5422269105911255, |
| "logps/chosen": -1.8217012882232666, |
| "logps/rejected": -2.3873071670532227, |
| "loss": 1.1467, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -3.643402576446533, |
| "rewards/margins": 1.1312119960784912, |
| "rewards/rejected": -4.774614334106445, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.8562933597621407, |
| "grad_norm": 2.396972179412842, |
| "learning_rate": 6.03559602661729e-08, |
| "logits/chosen": -0.6527739763259888, |
| "logits/rejected": -0.645389199256897, |
| "logps/chosen": -1.9720454216003418, |
| "logps/rejected": -2.2900233268737793, |
| "loss": 1.3423, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -3.9440908432006836, |
| "rewards/margins": 0.6359554529190063, |
| "rewards/rejected": -4.580046653747559, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.8584076643541461, |
| "grad_norm": 3.5759809017181396, |
| "learning_rate": 5.8603527737866307e-08, |
| "logits/chosen": -0.5955278277397156, |
| "logits/rejected": -0.583007276058197, |
| "logps/chosen": -1.835761547088623, |
| "logps/rejected": -2.2889809608459473, |
| "loss": 1.1015, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -3.671523094177246, |
| "rewards/margins": 0.9064393639564514, |
| "rewards/rejected": -4.5779619216918945, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.8605219689461513, |
| "grad_norm": 8.514383316040039, |
| "learning_rate": 5.687532752441232e-08, |
| "logits/chosen": -0.6325979828834534, |
| "logits/rejected": -0.5895124077796936, |
| "logps/chosen": -2.0668628215789795, |
| "logps/rejected": -2.4919605255126953, |
| "loss": 1.2469, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -4.133725643157959, |
| "rewards/margins": 0.8501947522163391, |
| "rewards/rejected": -4.983921051025391, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.8626362735381566, |
| "grad_norm": 2.7234861850738525, |
| "learning_rate": 5.517145450262639e-08, |
| "logits/chosen": -0.5355826616287231, |
| "logits/rejected": -0.5421631932258606, |
| "logps/chosen": -1.8649351596832275, |
| "logps/rejected": -2.5664312839508057, |
| "loss": 1.0119, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -3.729870319366455, |
| "rewards/margins": 1.4029927253723145, |
| "rewards/rejected": -5.132862567901611, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.8647505781301619, |
| "grad_norm": 3.1693661212921143, |
| "learning_rate": 5.3492002213780754e-08, |
| "logits/chosen": -0.5687247514724731, |
| "logits/rejected": -0.5579267740249634, |
| "logps/chosen": -2.0369410514831543, |
| "logps/rejected": -2.4640278816223145, |
| "loss": 1.311, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -4.073882102966309, |
| "rewards/margins": 0.8541740775108337, |
| "rewards/rejected": -4.928055763244629, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.8668648827221671, |
| "grad_norm": 1.8922606706619263, |
| "learning_rate": 5.183706285846873e-08, |
| "logits/chosen": -0.6247987151145935, |
| "logits/rejected": -0.6043509244918823, |
| "logps/chosen": -1.8121845722198486, |
| "logps/rejected": -2.2492425441741943, |
| "loss": 1.1291, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -3.6243691444396973, |
| "rewards/margins": 0.8741158843040466, |
| "rewards/rejected": -4.498485088348389, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.8689791873141725, |
| "grad_norm": 5.305470943450928, |
| "learning_rate": 5.020672729154307e-08, |
| "logits/chosen": -0.5554785132408142, |
| "logits/rejected": -0.565819501876831, |
| "logps/chosen": -1.9100950956344604, |
| "logps/rejected": -2.4060237407684326, |
| "loss": 1.1576, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -3.820190191268921, |
| "rewards/margins": 0.9918570518493652, |
| "rewards/rejected": -4.812047481536865, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.8710934919061777, |
| "grad_norm": 3.2367563247680664, |
| "learning_rate": 4.860108501712823e-08, |
| "logits/chosen": -0.6536320447921753, |
| "logits/rejected": -0.6901589035987854, |
| "logps/chosen": -1.9213619232177734, |
| "logps/rejected": -2.270475387573242, |
| "loss": 1.2711, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -3.842723846435547, |
| "rewards/margins": 0.6982269287109375, |
| "rewards/rejected": -4.540950775146484, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.873207796498183, |
| "grad_norm": 4.2919135093688965, |
| "learning_rate": 4.7020224183706715e-08, |
| "logits/chosen": -0.7220910787582397, |
| "logits/rejected": -0.7015137672424316, |
| "logps/chosen": -1.7745577096939087, |
| "logps/rejected": -2.2485008239746094, |
| "loss": 1.0773, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -3.5491154193878174, |
| "rewards/margins": 0.9478861093521118, |
| "rewards/rejected": -4.497001647949219, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.8753221010901883, |
| "grad_norm": 6.373754501342773, |
| "learning_rate": 4.54642315792792e-08, |
| "logits/chosen": -0.6177189946174622, |
| "logits/rejected": -0.6176800727844238, |
| "logps/chosen": -1.8406522274017334, |
| "logps/rejected": -2.368619441986084, |
| "loss": 1.1582, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -3.681304454803467, |
| "rewards/margins": 1.0559337139129639, |
| "rewards/rejected": -4.737238883972168, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.8774364056821936, |
| "grad_norm": 4.120994567871094, |
| "learning_rate": 4.3933192626600725e-08, |
| "logits/chosen": -0.5981518626213074, |
| "logits/rejected": -0.5846447348594666, |
| "logps/chosen": -1.9437062740325928, |
| "logps/rejected": -2.4175901412963867, |
| "loss": 1.1865, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -3.8874125480651855, |
| "rewards/margins": 0.9477680921554565, |
| "rewards/rejected": -4.835180282592773, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.8795507102741988, |
| "grad_norm": 3.618441104888916, |
| "learning_rate": 4.242719137849077e-08, |
| "logits/chosen": -0.544365644454956, |
| "logits/rejected": -0.5385901927947998, |
| "logps/chosen": -1.8662028312683105, |
| "logps/rejected": -2.2550435066223145, |
| "loss": 1.2125, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -3.732405662536621, |
| "rewards/margins": 0.7776816487312317, |
| "rewards/rejected": -4.510087013244629, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.8816650148662042, |
| "grad_norm": 8.518675804138184, |
| "learning_rate": 4.0946310513218726e-08, |
| "logits/chosen": -0.6048115491867065, |
| "logits/rejected": -0.5681714415550232, |
| "logps/chosen": -2.020745038986206, |
| "logps/rejected": -2.5642106533050537, |
| "loss": 1.1682, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -4.041490077972412, |
| "rewards/margins": 1.0869308710098267, |
| "rewards/rejected": -5.128421306610107, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.8837793194582094, |
| "grad_norm": 4.693824768066406, |
| "learning_rate": 3.9490631329964554e-08, |
| "logits/chosen": -0.5653468370437622, |
| "logits/rejected": -0.5610933303833008, |
| "logps/chosen": -1.8477216958999634, |
| "logps/rejected": -2.280613660812378, |
| "loss": 1.2177, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -3.6954433917999268, |
| "rewards/margins": 0.8657836318016052, |
| "rewards/rejected": -4.561227321624756, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.8858936240502148, |
| "grad_norm": 4.910251617431641, |
| "learning_rate": 3.806023374435663e-08, |
| "logits/chosen": -0.6456243991851807, |
| "logits/rejected": -0.6571968197822571, |
| "logps/chosen": -1.8414027690887451, |
| "logps/rejected": -2.2380261421203613, |
| "loss": 1.2081, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -3.6828055381774902, |
| "rewards/margins": 0.7932465076446533, |
| "rewards/rejected": -4.476052284240723, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.88800792864222, |
| "grad_norm": 2.260300636291504, |
| "learning_rate": 3.665519628408331e-08, |
| "logits/chosen": -0.6023683547973633, |
| "logits/rejected": -0.6400430798530579, |
| "logps/chosen": -2.039283275604248, |
| "logps/rejected": -2.520536184310913, |
| "loss": 1.1629, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -4.078566551208496, |
| "rewards/margins": 0.962505042552948, |
| "rewards/rejected": -5.041072368621826, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.8901222332342253, |
| "grad_norm": 2.411315679550171, |
| "learning_rate": 3.527559608458225e-08, |
| "logits/chosen": -0.6408150792121887, |
| "logits/rejected": -0.6065229177474976, |
| "logps/chosen": -1.91830313205719, |
| "logps/rejected": -2.378871440887451, |
| "loss": 1.1848, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -3.83660626411438, |
| "rewards/margins": 0.9211370944976807, |
| "rewards/rejected": -4.757742881774902, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.8922365378262306, |
| "grad_norm": 8.43724250793457, |
| "learning_rate": 3.39215088848061e-08, |
| "logits/chosen": -0.5962439179420471, |
| "logits/rejected": -0.5975909233093262, |
| "logps/chosen": -1.9837861061096191, |
| "logps/rejected": -2.319769859313965, |
| "loss": 1.2026, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -3.9675722122192383, |
| "rewards/margins": 0.6719677448272705, |
| "rewards/rejected": -4.63953971862793, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.8943508424182359, |
| "grad_norm": 2.261178731918335, |
| "learning_rate": 3.259300902306367e-08, |
| "logits/chosen": -0.6858331561088562, |
| "logits/rejected": -0.7034648060798645, |
| "logps/chosen": -1.8496602773666382, |
| "logps/rejected": -2.3583877086639404, |
| "loss": 1.1137, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -3.6993205547332764, |
| "rewards/margins": 1.0174546241760254, |
| "rewards/rejected": -4.716775417327881, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.8964651470102412, |
| "grad_norm": 7.621473789215088, |
| "learning_rate": 3.129016943293955e-08, |
| "logits/chosen": -0.6037753224372864, |
| "logits/rejected": -0.5865834355354309, |
| "logps/chosen": -1.902024507522583, |
| "logps/rejected": -2.3152518272399902, |
| "loss": 1.2577, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -3.804049015045166, |
| "rewards/margins": 0.8264546394348145, |
| "rewards/rejected": -4.6305036544799805, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.8985794516022465, |
| "grad_norm": 2.954953908920288, |
| "learning_rate": 3.001306163928985e-08, |
| "logits/chosen": -0.6682695746421814, |
| "logits/rejected": -0.6516857147216797, |
| "logps/chosen": -2.0923025608062744, |
| "logps/rejected": -2.4602210521698, |
| "loss": 1.3758, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -4.184605121612549, |
| "rewards/margins": 0.7358372211456299, |
| "rewards/rejected": -4.9204421043396, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.9006937561942517, |
| "grad_norm": 4.746059894561768, |
| "learning_rate": 2.8761755754315663e-08, |
| "logits/chosen": -0.6213058829307556, |
| "logits/rejected": -0.6071665287017822, |
| "logps/chosen": -1.9309402704238892, |
| "logps/rejected": -2.3048858642578125, |
| "loss": 1.2216, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -3.8618805408477783, |
| "rewards/margins": 0.7478916049003601, |
| "rewards/rejected": -4.609771728515625, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.902808060786257, |
| "grad_norm": 3.4567902088165283, |
| "learning_rate": 2.753632047371335e-08, |
| "logits/chosen": -0.5602300763130188, |
| "logits/rejected": -0.5994393825531006, |
| "logps/chosen": -2.0382192134857178, |
| "logps/rejected": -2.4620015621185303, |
| "loss": 1.1534, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -4.0764384269714355, |
| "rewards/margins": 0.8475649952888489, |
| "rewards/rejected": -4.9240031242370605, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.9049223653782623, |
| "grad_norm": 8.650147438049316, |
| "learning_rate": 2.63368230729043e-08, |
| "logits/chosen": -0.6574521660804749, |
| "logits/rejected": -0.6474560499191284, |
| "logps/chosen": -2.01283860206604, |
| "logps/rejected": -2.3451762199401855, |
| "loss": 1.3337, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -4.02567720413208, |
| "rewards/margins": 0.6646751165390015, |
| "rewards/rejected": -4.690352439880371, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.9070366699702676, |
| "grad_norm": 4.965768337249756, |
| "learning_rate": 2.5163329403340593e-08, |
| "logits/chosen": -0.632398784160614, |
| "logits/rejected": -0.6226595640182495, |
| "logps/chosen": -1.9954252243041992, |
| "logps/rejected": -2.415121555328369, |
| "loss": 1.1249, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -3.9908504486083984, |
| "rewards/margins": 0.8393926024436951, |
| "rewards/rejected": -4.830243110656738, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.9091509745622729, |
| "grad_norm": 4.165818214416504, |
| "learning_rate": 2.4015903888890242e-08, |
| "logits/chosen": -0.6372086405754089, |
| "logits/rejected": -0.6573516130447388, |
| "logps/chosen": -1.9238042831420898, |
| "logps/rejected": -2.3672964572906494, |
| "loss": 1.1372, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -3.8476085662841797, |
| "rewards/margins": 0.8869843482971191, |
| "rewards/rejected": -4.734592914581299, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.9112652791542781, |
| "grad_norm": 4.025818347930908, |
| "learning_rate": 2.289460952230038e-08, |
| "logits/chosen": -0.6017577648162842, |
| "logits/rejected": -0.5835919380187988, |
| "logps/chosen": -1.9263951778411865, |
| "logps/rejected": -2.364337921142578, |
| "loss": 1.1519, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -3.852790355682373, |
| "rewards/margins": 0.875885009765625, |
| "rewards/rejected": -4.728675842285156, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.9133795837462835, |
| "grad_norm": 2.232624053955078, |
| "learning_rate": 2.1799507861738788e-08, |
| "logits/chosen": -0.697775661945343, |
| "logits/rejected": -0.7254015803337097, |
| "logps/chosen": -1.8258415460586548, |
| "logps/rejected": -2.089477777481079, |
| "loss": 1.3136, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -3.6516830921173096, |
| "rewards/margins": 0.5272722244262695, |
| "rewards/rejected": -4.178955554962158, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.9154938883382887, |
| "grad_norm": 5.815128326416016, |
| "learning_rate": 2.073065902741472e-08, |
| "logits/chosen": -0.5873744487762451, |
| "logits/rejected": -0.5638723969459534, |
| "logps/chosen": -1.9891620874404907, |
| "logps/rejected": -2.4962096214294434, |
| "loss": 1.1379, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -3.9783241748809814, |
| "rewards/margins": 1.0140951871871948, |
| "rewards/rejected": -4.992419242858887, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.917608192930294, |
| "grad_norm": 5.057411193847656, |
| "learning_rate": 1.9688121698277993e-08, |
| "logits/chosen": -0.607324481010437, |
| "logits/rejected": -0.5964059829711914, |
| "logps/chosen": -1.8643240928649902, |
| "logps/rejected": -2.2751855850219727, |
| "loss": 1.2388, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -3.7286481857299805, |
| "rewards/margins": 0.8217229843139648, |
| "rewards/rejected": -4.550371170043945, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.9197224975222993, |
| "grad_norm": 2.25390362739563, |
| "learning_rate": 1.8671953108797823e-08, |
| "logits/chosen": -0.6268022656440735, |
| "logits/rejected": -0.6332954168319702, |
| "logps/chosen": -1.945924997329712, |
| "logps/rejected": -2.330981731414795, |
| "loss": 1.1455, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -3.891849994659424, |
| "rewards/margins": 0.7701136469841003, |
| "rewards/rejected": -4.66196346282959, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.9218368021143046, |
| "grad_norm": 3.9572856426239014, |
| "learning_rate": 1.7682209045820684e-08, |
| "logits/chosen": -0.6522207856178284, |
| "logits/rejected": -0.6930267810821533, |
| "logps/chosen": -1.8152984380722046, |
| "logps/rejected": -2.0800223350524902, |
| "loss": 1.2978, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -3.630596876144409, |
| "rewards/margins": 0.5294479727745056, |
| "rewards/rejected": -4.1600446701049805, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.9239511067063099, |
| "grad_norm": 1.733438491821289, |
| "learning_rate": 1.671894384550743e-08, |
| "logits/chosen": -0.5977643728256226, |
| "logits/rejected": -0.5842040777206421, |
| "logps/chosen": -1.8794972896575928, |
| "logps/rejected": -2.413329601287842, |
| "loss": 1.0233, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -3.7589945793151855, |
| "rewards/margins": 1.0676649808883667, |
| "rewards/rejected": -4.826659202575684, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.9260654112983152, |
| "grad_norm": 2.8760743141174316, |
| "learning_rate": 1.5782210390350713e-08, |
| "logits/chosen": -0.5813508033752441, |
| "logits/rejected": -0.5602753758430481, |
| "logps/chosen": -1.7892794609069824, |
| "logps/rejected": -2.32309627532959, |
| "loss": 1.0836, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -3.578558921813965, |
| "rewards/margins": 1.0676335096359253, |
| "rewards/rejected": -4.64619255065918, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.9281797158903204, |
| "grad_norm": 5.760490894317627, |
| "learning_rate": 1.4872060106271179e-08, |
| "logits/chosen": -0.5673117637634277, |
| "logits/rejected": -0.5580011606216431, |
| "logps/chosen": -1.943117618560791, |
| "logps/rejected": -2.4581894874572754, |
| "loss": 1.1229, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -3.886235237121582, |
| "rewards/margins": 1.030144453048706, |
| "rewards/rejected": -4.916378974914551, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.9302940204823258, |
| "grad_norm": 5.213393211364746, |
| "learning_rate": 1.3988542959794625e-08, |
| "logits/chosen": -0.5715171098709106, |
| "logits/rejected": -0.5791775584220886, |
| "logps/chosen": -1.961305022239685, |
| "logps/rejected": -2.4485957622528076, |
| "loss": 1.0877, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -3.92261004447937, |
| "rewards/margins": 0.9745810627937317, |
| "rewards/rejected": -4.897191524505615, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.932408325074331, |
| "grad_norm": 2.670029878616333, |
| "learning_rate": 1.3131707455309004e-08, |
| "logits/chosen": -0.6612206101417542, |
| "logits/rejected": -0.569149374961853, |
| "logps/chosen": -1.9947882890701294, |
| "logps/rejected": -2.41544771194458, |
| "loss": 1.2501, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -3.989576578140259, |
| "rewards/margins": 0.8413184881210327, |
| "rewards/rejected": -4.83089542388916, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.9345226296663363, |
| "grad_norm": 2.0773093700408936, |
| "learning_rate": 1.230160063240121e-08, |
| "logits/chosen": -0.5475001335144043, |
| "logits/rejected": -0.6024526953697205, |
| "logps/chosen": -1.9972546100616455, |
| "logps/rejected": -2.2212231159210205, |
| "loss": 1.2857, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -3.994509220123291, |
| "rewards/margins": 0.4479368031024933, |
| "rewards/rejected": -4.442446231842041, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.9366369342583416, |
| "grad_norm": 2.6185569763183594, |
| "learning_rate": 1.1498268063274697e-08, |
| "logits/chosen": -0.6600778102874756, |
| "logits/rejected": -0.6794160604476929, |
| "logps/chosen": -1.7303975820541382, |
| "logps/rejected": -2.0589568614959717, |
| "loss": 1.183, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -3.4607951641082764, |
| "rewards/margins": 0.6571190357208252, |
| "rewards/rejected": -4.117913722991943, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.9387512388503468, |
| "grad_norm": 2.7385923862457275, |
| "learning_rate": 1.0721753850247984e-08, |
| "logits/chosen": -0.6136504411697388, |
| "logits/rejected": -0.5926402807235718, |
| "logps/chosen": -1.9593303203582764, |
| "logps/rejected": -2.446382999420166, |
| "loss": 1.161, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -3.9186606407165527, |
| "rewards/margins": 0.9741055965423584, |
| "rewards/rejected": -4.892765998840332, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.9408655434423522, |
| "grad_norm": 2.006077527999878, |
| "learning_rate": 9.972100623333035e-09, |
| "logits/chosen": -0.5911227464675903, |
| "logits/rejected": -0.5988056063652039, |
| "logps/chosen": -1.9767932891845703, |
| "logps/rejected": -2.307847499847412, |
| "loss": 1.2698, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -3.9535865783691406, |
| "rewards/margins": 0.6621084809303284, |
| "rewards/rejected": -4.615694999694824, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.9429798480343574, |
| "grad_norm": 3.775676965713501, |
| "learning_rate": 9.249349537894968e-09, |
| "logits/chosen": -0.5951496958732605, |
| "logits/rejected": -0.5602840185165405, |
| "logps/chosen": -2.01466965675354, |
| "logps/rejected": -2.404120922088623, |
| "loss": 1.3551, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -4.02933931350708, |
| "rewards/margins": 0.7789022922515869, |
| "rewards/rejected": -4.808241844177246, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.9450941526263628, |
| "grad_norm": 10.657898902893066, |
| "learning_rate": 8.553540272392967e-09, |
| "logits/chosen": -0.616013765335083, |
| "logits/rejected": -0.6068493127822876, |
| "logps/chosen": -1.9523563385009766, |
| "logps/rejected": -2.3371798992156982, |
| "loss": 1.2264, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -3.904712677001953, |
| "rewards/margins": 0.7696471810340881, |
| "rewards/rejected": -4.6743597984313965, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.947208457218368, |
| "grad_norm": 5.239955902099609, |
| "learning_rate": 7.884711026201584e-09, |
| "logits/chosen": -0.5559091567993164, |
| "logits/rejected": -0.5499454140663147, |
| "logps/chosen": -1.9888339042663574, |
| "logps/rejected": -2.5645201206207275, |
| "loss": 1.1615, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -3.977667808532715, |
| "rewards/margins": 1.1513725519180298, |
| "rewards/rejected": -5.129040241241455, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.9493227618103733, |
| "grad_norm": 4.970836162567139, |
| "learning_rate": 7.242898517513863e-09, |
| "logits/chosen": -0.6270098686218262, |
| "logits/rejected": -0.5990616083145142, |
| "logps/chosen": -2.0393564701080322, |
| "logps/rejected": -2.6450533866882324, |
| "loss": 1.0316, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -4.0787129402160645, |
| "rewards/margins": 1.2113933563232422, |
| "rewards/rejected": -5.290106773376465, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.9514370664023786, |
| "grad_norm": 4.401031494140625, |
| "learning_rate": 6.62813798132561e-09, |
| "logits/chosen": -0.6103833913803101, |
| "logits/rejected": -0.6355498433113098, |
| "logps/chosen": -1.9900306463241577, |
| "logps/rejected": -2.4799742698669434, |
| "loss": 1.1272, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -3.9800612926483154, |
| "rewards/margins": 0.979887843132019, |
| "rewards/rejected": -4.959948539733887, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.9535513709943839, |
| "grad_norm": 5.162088871002197, |
| "learning_rate": 6.040463167500509e-09, |
| "logits/chosen": -0.6351377367973328, |
| "logits/rejected": -0.6445170044898987, |
| "logps/chosen": -2.017266035079956, |
| "logps/rejected": -2.4103317260742188, |
| "loss": 1.2591, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -4.034532070159912, |
| "rewards/margins": 0.7861310243606567, |
| "rewards/rejected": -4.8206634521484375, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.9556656755863892, |
| "grad_norm": 3.158773422241211, |
| "learning_rate": 5.4799063389179834e-09, |
| "logits/chosen": -0.6216992139816284, |
| "logits/rejected": -0.6317836046218872, |
| "logps/chosen": -1.9916179180145264, |
| "logps/rejected": -2.476783275604248, |
| "loss": 1.192, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -3.9832358360290527, |
| "rewards/margins": 0.970331072807312, |
| "rewards/rejected": -4.953566551208496, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.9577799801783945, |
| "grad_norm": 4.7540435791015625, |
| "learning_rate": 4.946498269701616e-09, |
| "logits/chosen": -0.652457594871521, |
| "logits/rejected": -0.6148388385772705, |
| "logps/chosen": -2.0300891399383545, |
| "logps/rejected": -2.5610132217407227, |
| "loss": 1.0769, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -4.060178279876709, |
| "rewards/margins": 1.061848759651184, |
| "rewards/rejected": -5.122026443481445, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.9598942847703997, |
| "grad_norm": 4.686556339263916, |
| "learning_rate": 4.440268243529666e-09, |
| "logits/chosen": -0.5588012337684631, |
| "logits/rejected": -0.5526341199874878, |
| "logps/chosen": -1.8666988611221313, |
| "logps/rejected": -2.3390815258026123, |
| "loss": 1.1768, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -3.7333977222442627, |
| "rewards/margins": 0.9447645545005798, |
| "rewards/rejected": -4.678163051605225, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.9620085893624051, |
| "grad_norm": 2.740269422531128, |
| "learning_rate": 3.961244052027413e-09, |
| "logits/chosen": -0.6438521146774292, |
| "logits/rejected": -0.6682748198509216, |
| "logps/chosen": -2.0076475143432617, |
| "logps/rejected": -2.388810396194458, |
| "loss": 1.2689, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -4.015295028686523, |
| "rewards/margins": 0.7623259425163269, |
| "rewards/rejected": -4.777620792388916, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.9641228939544103, |
| "grad_norm": 2.9197144508361816, |
| "learning_rate": 3.509451993241541e-09, |
| "logits/chosen": -0.5822494029998779, |
| "logits/rejected": -0.5853508114814758, |
| "logps/chosen": -1.8848122358322144, |
| "logps/rejected": -2.4192898273468018, |
| "loss": 1.0924, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -3.7696244716644287, |
| "rewards/margins": 1.0689555406570435, |
| "rewards/rejected": -4.8385796546936035, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.9662371985464155, |
| "grad_norm": 4.501737117767334, |
| "learning_rate": 3.084916870196297e-09, |
| "logits/chosen": -0.5652188658714294, |
| "logits/rejected": -0.5740686655044556, |
| "logps/chosen": -1.9216543436050415, |
| "logps/rejected": -2.23102068901062, |
| "loss": 1.2907, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -3.843308687210083, |
| "rewards/margins": 0.618732750415802, |
| "rewards/rejected": -4.46204137802124, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.9683515031384209, |
| "grad_norm": 3.512376070022583, |
| "learning_rate": 2.687661989531964e-09, |
| "logits/chosen": -0.6515664458274841, |
| "logits/rejected": -0.6550417542457581, |
| "logps/chosen": -1.9334843158721924, |
| "logps/rejected": -2.2688543796539307, |
| "loss": 1.2578, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -3.8669686317443848, |
| "rewards/margins": 0.67074054479599, |
| "rewards/rejected": -4.537708759307861, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.9704658077304261, |
| "grad_norm": 2.165844678878784, |
| "learning_rate": 2.3177091602251675e-09, |
| "logits/chosen": -0.6218724250793457, |
| "logits/rejected": -0.5920112729072571, |
| "logps/chosen": -1.8584281206130981, |
| "logps/rejected": -2.366225242614746, |
| "loss": 1.1553, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -3.7168562412261963, |
| "rewards/margins": 1.0155941247940063, |
| "rewards/rejected": -4.732450485229492, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.9725801123224315, |
| "grad_norm": 1.7227884531021118, |
| "learning_rate": 1.975078692391552e-09, |
| "logits/chosen": -0.5791985988616943, |
| "logits/rejected": -0.5785022974014282, |
| "logps/chosen": -1.8981022834777832, |
| "logps/rejected": -2.3716633319854736, |
| "loss": 1.1642, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -3.7962045669555664, |
| "rewards/margins": 0.9471220970153809, |
| "rewards/rejected": -4.743326663970947, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.9746944169144367, |
| "grad_norm": 3.150090217590332, |
| "learning_rate": 1.659789396171063e-09, |
| "logits/chosen": -0.6548072695732117, |
| "logits/rejected": -0.6290433406829834, |
| "logps/chosen": -2.0168204307556152, |
| "logps/rejected": -2.520479679107666, |
| "loss": 1.0736, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -4.0336408615112305, |
| "rewards/margins": 1.0073186159133911, |
| "rewards/rejected": -5.040959358215332, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.976808721506442, |
| "grad_norm": 1.256157636642456, |
| "learning_rate": 1.37185858069494e-09, |
| "logits/chosen": -0.7094852328300476, |
| "logits/rejected": -0.7226460576057434, |
| "logps/chosen": -1.8896048069000244, |
| "logps/rejected": -2.4871973991394043, |
| "loss": 1.0536, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -3.779209613800049, |
| "rewards/margins": 1.1951854228973389, |
| "rewards/rejected": -4.974394798278809, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.9789230260984473, |
| "grad_norm": 2.8358895778656006, |
| "learning_rate": 1.1113020531357541e-09, |
| "logits/chosen": -0.6778469085693359, |
| "logits/rejected": -0.6957201957702637, |
| "logps/chosen": -2.0275380611419678, |
| "logps/rejected": -2.470618963241577, |
| "loss": 1.1801, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -4.0550761222839355, |
| "rewards/margins": 0.886161208152771, |
| "rewards/rejected": -4.941237926483154, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.9810373306904526, |
| "grad_norm": 2.8881914615631104, |
| "learning_rate": 8.781341178393242e-10, |
| "logits/chosen": -0.5639821887016296, |
| "logits/rejected": -0.5891467928886414, |
| "logps/chosen": -2.0047199726104736, |
| "logps/rejected": -2.522782802581787, |
| "loss": 1.1948, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -4.009439945220947, |
| "rewards/margins": 1.0361257791519165, |
| "rewards/rejected": -5.045565605163574, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.9831516352824579, |
| "grad_norm": 3.421194314956665, |
| "learning_rate": 6.723675755396229e-10, |
| "logits/chosen": -0.540326714515686, |
| "logits/rejected": -0.5159227252006531, |
| "logps/chosen": -1.88228178024292, |
| "logps/rejected": -2.2003138065338135, |
| "loss": 1.2191, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -3.76456356048584, |
| "rewards/margins": 0.6360642313957214, |
| "rewards/rejected": -4.400627613067627, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.9852659398744632, |
| "grad_norm": 4.243066310882568, |
| "learning_rate": 4.940137226560615e-10, |
| "logits/chosen": -0.6175463795661926, |
| "logits/rejected": -0.6400432586669922, |
| "logps/chosen": -1.9547748565673828, |
| "logps/rejected": -2.4598965644836426, |
| "loss": 1.2589, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -3.9095497131347656, |
| "rewards/margins": 1.0102434158325195, |
| "rewards/rejected": -4.919793128967285, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.9873802444664684, |
| "grad_norm": 3.3425028324127197, |
| "learning_rate": 3.430823506730962e-10, |
| "logits/chosen": -0.5236034393310547, |
| "logits/rejected": -0.48699086904525757, |
| "logps/chosen": -2.167372465133667, |
| "logps/rejected": -2.687620162963867, |
| "loss": 1.2024, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -4.334744930267334, |
| "rewards/margins": 1.0404952764511108, |
| "rewards/rejected": -5.375240325927734, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.9894945490584738, |
| "grad_norm": 3.1803112030029297, |
| "learning_rate": 2.1958174560282594e-10, |
| "logits/chosen": -0.6515716910362244, |
| "logits/rejected": -0.6526726484298706, |
| "logps/chosen": -2.0350496768951416, |
| "logps/rejected": -2.4857177734375, |
| "loss": 1.1524, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -4.070099353790283, |
| "rewards/margins": 0.9013361930847168, |
| "rewards/rejected": -4.971435546875, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.991608853650479, |
| "grad_norm": 2.8402769565582275, |
| "learning_rate": 1.2351868753018858e-10, |
| "logits/chosen": -0.5555111765861511, |
| "logits/rejected": -0.5084383487701416, |
| "logps/chosen": -1.9741497039794922, |
| "logps/rejected": -2.5360653400421143, |
| "loss": 1.0956, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -3.9482994079589844, |
| "rewards/margins": 1.1238315105438232, |
| "rewards/rejected": -5.0721306800842285, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.9937231582424844, |
| "grad_norm": 14.110418319702148, |
| "learning_rate": 5.4898450240536964e-11, |
| "logits/chosen": -0.6210866570472717, |
| "logits/rejected": -0.614806056022644, |
| "logps/chosen": -2.0763094425201416, |
| "logps/rejected": -2.5026116371154785, |
| "loss": 1.2184, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -4.152618885040283, |
| "rewards/margins": 0.8526046276092529, |
| "rewards/rejected": -5.005223274230957, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.9958374628344896, |
| "grad_norm": 2.8393566608428955, |
| "learning_rate": 1.3724800930314805e-11, |
| "logits/chosen": -0.5895847678184509, |
| "logits/rejected": -0.6269129514694214, |
| "logps/chosen": -1.8787530660629272, |
| "logps/rejected": -2.4467647075653076, |
| "loss": 1.0714, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -3.7575061321258545, |
| "rewards/margins": 1.1360235214233398, |
| "rewards/rejected": -4.893529415130615, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.9979517674264948, |
| "grad_norm": 3.9959075450897217, |
| "learning_rate": 0.0, |
| "logits/chosen": -0.6461910009384155, |
| "logits/rejected": -0.6503991484642029, |
| "logps/chosen": -1.798724889755249, |
| "logps/rejected": -2.3589823246002197, |
| "loss": 1.0133, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -3.597449779510498, |
| "rewards/margins": 1.1205153465270996, |
| "rewards/rejected": -4.7179646492004395, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.9979517674264948, |
| "step": 472, |
| "total_flos": 0.0, |
| "train_loss": 1.280224425307775, |
| "train_runtime": 38087.5267, |
| "train_samples_per_second": 1.589, |
| "train_steps_per_second": 0.012 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 472, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 64, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|