| { | |
| "best_metric": 0.6825469136238098, | |
| "best_model_checkpoint": "/mnt/data/shesj/Trained/RL4CoT/DPO/Parallel_Iter2_numglueCorrect_iter2_10lang.json/checkpoint-200", | |
| "epoch": 0.050327126321087066, | |
| "eval_steps": 100, | |
| "global_step": 200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-08, | |
| "logits/chosen": -0.7881901264190674, | |
| "logits/rejected": -0.7754368782043457, | |
| "logps/chosen": -5.556678295135498, | |
| "logps/rejected": -8.082754135131836, | |
| "loss": 0.693, | |
| "rewards/accuracies": 0.3187499940395355, | |
| "rewards/chosen": 0.0005767763941548765, | |
| "rewards/margins": -0.000614482443779707, | |
| "rewards/rejected": 0.0011912587797269225, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 1e-07, | |
| "logits/chosen": -0.7774807214736938, | |
| "logits/rejected": -0.7521709203720093, | |
| "logps/chosen": -6.2856526374816895, | |
| "logps/rejected": -7.786572456359863, | |
| "loss": 0.6935, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.0011454308405518532, | |
| "rewards/margins": 0.002339282538741827, | |
| "rewards/rejected": -0.003484714310616255, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 1.5e-07, | |
| "logits/chosen": -0.7695692777633667, | |
| "logits/rejected": -0.7617800831794739, | |
| "logps/chosen": -5.672076225280762, | |
| "logps/rejected": -7.90362548828125, | |
| "loss": 0.6935, | |
| "rewards/accuracies": 0.4124999940395355, | |
| "rewards/chosen": -0.004858463071286678, | |
| "rewards/margins": -0.00798516534268856, | |
| "rewards/rejected": 0.003126702504232526, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 2e-07, | |
| "logits/chosen": -0.8169188499450684, | |
| "logits/rejected": -0.8234481811523438, | |
| "logps/chosen": -5.951030731201172, | |
| "logps/rejected": -7.665135383605957, | |
| "loss": 0.6924, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -0.0009545508655719459, | |
| "rewards/margins": -0.0022635911591351032, | |
| "rewards/rejected": 0.00130904046818614, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 2.5e-07, | |
| "logits/chosen": -0.7988893389701843, | |
| "logits/rejected": -0.7831005454063416, | |
| "logps/chosen": -4.960128307342529, | |
| "logps/rejected": -7.793705940246582, | |
| "loss": 0.6929, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.0017494624480605125, | |
| "rewards/margins": 0.0019936964381486177, | |
| "rewards/rejected": -0.0002442340482957661, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3e-07, | |
| "logits/chosen": -0.7896796464920044, | |
| "logits/rejected": -0.7605875730514526, | |
| "logps/chosen": -6.406218528747559, | |
| "logps/rejected": -8.445697784423828, | |
| "loss": 0.6923, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -0.0006792292697355151, | |
| "rewards/margins": -0.0016146342968568206, | |
| "rewards/rejected": 0.0009354048524983227, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3.5e-07, | |
| "logits/chosen": -0.8104821443557739, | |
| "logits/rejected": -0.7983841896057129, | |
| "logps/chosen": -6.952303409576416, | |
| "logps/rejected": -8.65689754486084, | |
| "loss": 0.6926, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": 0.0003856793628074229, | |
| "rewards/margins": 0.0037465274799615145, | |
| "rewards/rejected": -0.0033608481753617525, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4e-07, | |
| "logits/chosen": -0.8198621869087219, | |
| "logits/rejected": -0.8019220232963562, | |
| "logps/chosen": -6.161223888397217, | |
| "logps/rejected": -7.956850528717041, | |
| "loss": 0.6927, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": 0.001837434945628047, | |
| "rewards/margins": 0.005904150195419788, | |
| "rewards/rejected": -0.004066715482622385, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.5e-07, | |
| "logits/chosen": -0.7631333470344543, | |
| "logits/rejected": -0.7561143636703491, | |
| "logps/chosen": -5.855575084686279, | |
| "logps/rejected": -7.01950740814209, | |
| "loss": 0.6913, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": 0.0016343919560313225, | |
| "rewards/margins": 0.005311951506882906, | |
| "rewards/rejected": -0.0036775595508515835, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 5e-07, | |
| "logits/chosen": -0.7467092871665955, | |
| "logits/rejected": -0.7552592754364014, | |
| "logps/chosen": -7.219940185546875, | |
| "logps/rejected": -7.984251976013184, | |
| "loss": 0.6907, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": 0.0006344284047372639, | |
| "rewards/margins": 0.0040216282941401005, | |
| "rewards/rejected": -0.003387199714779854, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 5.5e-07, | |
| "logits/chosen": -0.8183493614196777, | |
| "logits/rejected": -0.8048542737960815, | |
| "logps/chosen": -5.986401557922363, | |
| "logps/rejected": -7.050605773925781, | |
| "loss": 0.6903, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.0044477893970906734, | |
| "rewards/margins": 0.013396045193076134, | |
| "rewards/rejected": -0.008948257192969322, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 6e-07, | |
| "logits/chosen": -0.7246443033218384, | |
| "logits/rejected": -0.7153327465057373, | |
| "logps/chosen": -6.37067985534668, | |
| "logps/rejected": -7.855441093444824, | |
| "loss": 0.69, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.0028912366833537817, | |
| "rewards/margins": 0.0029723027255386114, | |
| "rewards/rejected": -0.005863540340214968, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 6.5e-07, | |
| "logits/chosen": -0.7883706092834473, | |
| "logits/rejected": -0.7892045974731445, | |
| "logps/chosen": -5.0366129875183105, | |
| "logps/rejected": -6.685678005218506, | |
| "loss": 0.689, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.003989654593169689, | |
| "rewards/margins": 0.0065727815963327885, | |
| "rewards/rejected": -0.002583127235993743, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 7e-07, | |
| "logits/chosen": -0.7610381245613098, | |
| "logits/rejected": -0.767534613609314, | |
| "logps/chosen": -6.8763604164123535, | |
| "logps/rejected": -8.272597312927246, | |
| "loss": 0.687, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.0005852003814652562, | |
| "rewards/margins": 0.012984293513000011, | |
| "rewards/rejected": -0.013569491915404797, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 7.5e-07, | |
| "logits/chosen": -0.7938845753669739, | |
| "logits/rejected": -0.7884698510169983, | |
| "logps/chosen": -6.220009803771973, | |
| "logps/rejected": -7.81838321685791, | |
| "loss": 0.685, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.008494245819747448, | |
| "rewards/margins": 0.02036314085125923, | |
| "rewards/rejected": -0.011868895962834358, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 8e-07, | |
| "logits/chosen": -0.760898232460022, | |
| "logits/rejected": -0.7529922127723694, | |
| "logps/chosen": -6.070019245147705, | |
| "logps/rejected": -8.474264144897461, | |
| "loss": 0.6809, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.0008083779248408973, | |
| "rewards/margins": 0.0290432907640934, | |
| "rewards/rejected": -0.029851669445633888, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 8.499999999999999e-07, | |
| "logits/chosen": -0.8255828619003296, | |
| "logits/rejected": -0.8029024004936218, | |
| "logps/chosen": -5.739585876464844, | |
| "logps/rejected": -8.894620895385742, | |
| "loss": 0.681, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.0006468339124694467, | |
| "rewards/margins": 0.039313118904829025, | |
| "rewards/rejected": -0.03866628557443619, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 9e-07, | |
| "logits/chosen": -0.8031052350997925, | |
| "logits/rejected": -0.7612560987472534, | |
| "logps/chosen": -6.660666465759277, | |
| "logps/rejected": -10.91639232635498, | |
| "loss": 0.677, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.008988827466964722, | |
| "rewards/margins": 0.03577885776758194, | |
| "rewards/rejected": -0.04476768523454666, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 9.499999999999999e-07, | |
| "logits/chosen": -0.8087406158447266, | |
| "logits/rejected": -0.7717125415802002, | |
| "logps/chosen": -6.990227699279785, | |
| "logps/rejected": -10.181965827941895, | |
| "loss": 0.6766, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": -0.017899103462696075, | |
| "rewards/margins": 0.03709184005856514, | |
| "rewards/rejected": -0.054990947246551514, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1e-06, | |
| "logits/chosen": -0.7920883297920227, | |
| "logits/rejected": -0.7615999579429626, | |
| "logps/chosen": -7.010110378265381, | |
| "logps/rejected": -8.589981079101562, | |
| "loss": 0.6742, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.01662164181470871, | |
| "rewards/margins": 0.04322618246078491, | |
| "rewards/rejected": -0.05984782055020332, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_logits/chosen": -1.2141907215118408, | |
| "eval_logits/rejected": -1.2049294710159302, | |
| "eval_logps/chosen": -6.552766799926758, | |
| "eval_logps/rejected": -8.47075366973877, | |
| "eval_loss": 0.6869122385978699, | |
| "eval_rewards/accuracies": 0.5723472833633423, | |
| "eval_rewards/chosen": -0.021150289103388786, | |
| "eval_rewards/margins": 0.02127229794859886, | |
| "eval_rewards/rejected": -0.0424225889146328, | |
| "eval_runtime": 628.2123, | |
| "eval_samples_per_second": 31.588, | |
| "eval_steps_per_second": 0.495, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 9.999829128320873e-07, | |
| "logits/chosen": -0.7386836409568787, | |
| "logits/rejected": -0.7065194845199585, | |
| "logps/chosen": -7.015887260437012, | |
| "logps/rejected": -8.969260215759277, | |
| "loss": 0.6691, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.02153836190700531, | |
| "rewards/margins": 0.05296989530324936, | |
| "rewards/rejected": -0.07450826466083527, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 9.999316524962345e-07, | |
| "logits/chosen": -0.8299457430839539, | |
| "logits/rejected": -0.8254146575927734, | |
| "logps/chosen": -6.386677265167236, | |
| "logps/rejected": -8.159158706665039, | |
| "loss": 0.6626, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.019821835681796074, | |
| "rewards/margins": 0.08776978403329849, | |
| "rewards/rejected": -0.10759161412715912, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 9.998462224960173e-07, | |
| "logits/chosen": -0.7512461543083191, | |
| "logits/rejected": -0.7053896188735962, | |
| "logps/chosen": -7.265576362609863, | |
| "logps/rejected": -10.415300369262695, | |
| "loss": 0.6565, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -0.04903126507997513, | |
| "rewards/margins": 0.10108338296413422, | |
| "rewards/rejected": -0.15011465549468994, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 9.99726628670463e-07, | |
| "logits/chosen": -0.8122448921203613, | |
| "logits/rejected": -0.7945531010627747, | |
| "logps/chosen": -6.279524326324463, | |
| "logps/rejected": -8.167196273803711, | |
| "loss": 0.6583, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -0.04174378514289856, | |
| "rewards/margins": 0.07122843712568283, | |
| "rewards/rejected": -0.11297222226858139, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 9.995728791936505e-07, | |
| "logits/chosen": -0.7480685114860535, | |
| "logits/rejected": -0.7061656713485718, | |
| "logps/chosen": -6.890868186950684, | |
| "logps/rejected": -10.21199893951416, | |
| "loss": 0.651, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.07250909507274628, | |
| "rewards/margins": 0.08439986407756805, | |
| "rewards/rejected": -0.15690895915031433, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 9.993849845741523e-07, | |
| "logits/chosen": -0.7156326174736023, | |
| "logits/rejected": -0.7210611701011658, | |
| "logps/chosen": -7.967876434326172, | |
| "logps/rejected": -11.226155281066895, | |
| "loss": 0.6563, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.0788055881857872, | |
| "rewards/margins": 0.14646434783935547, | |
| "rewards/rejected": -0.22526994347572327, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 9.991629576543163e-07, | |
| "logits/chosen": -0.8028038740158081, | |
| "logits/rejected": -0.7880641222000122, | |
| "logps/chosen": -7.9293036460876465, | |
| "logps/rejected": -12.446617126464844, | |
| "loss": 0.6393, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": -0.07287696748971939, | |
| "rewards/margins": 0.16767558455467224, | |
| "rewards/rejected": -0.24055257439613342, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 9.989068136093872e-07, | |
| "logits/chosen": -0.6804400682449341, | |
| "logits/rejected": -0.6678518056869507, | |
| "logps/chosen": -7.790997505187988, | |
| "logps/rejected": -10.521781921386719, | |
| "loss": 0.6429, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.10002864897251129, | |
| "rewards/margins": 0.16521799564361572, | |
| "rewards/rejected": -0.2652466297149658, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 9.986165699464705e-07, | |
| "logits/chosen": -0.7420132160186768, | |
| "logits/rejected": -0.7359737157821655, | |
| "logps/chosen": -7.692935943603516, | |
| "logps/rejected": -11.80825138092041, | |
| "loss": 0.6275, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -0.12237729877233505, | |
| "rewards/margins": 0.2165246307849884, | |
| "rewards/rejected": -0.33890193700790405, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 9.982922465033348e-07, | |
| "logits/chosen": -0.6650699377059937, | |
| "logits/rejected": -0.6643859148025513, | |
| "logps/chosen": -8.27735710144043, | |
| "logps/rejected": -11.08592700958252, | |
| "loss": 0.6316, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.18646416068077087, | |
| "rewards/margins": 0.17933328449726105, | |
| "rewards/rejected": -0.3657974600791931, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 9.979338654470567e-07, | |
| "logits/chosen": -0.6874249577522278, | |
| "logits/rejected": -0.6583540439605713, | |
| "logps/chosen": -8.387764930725098, | |
| "logps/rejected": -10.597826957702637, | |
| "loss": 0.6355, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -0.15453846752643585, | |
| "rewards/margins": 0.15605905652046204, | |
| "rewards/rejected": -0.3105975389480591, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 9.975414512725056e-07, | |
| "logits/chosen": -0.6604259610176086, | |
| "logits/rejected": -0.654133677482605, | |
| "logps/chosen": -8.139281272888184, | |
| "logps/rejected": -11.677125930786133, | |
| "loss": 0.6281, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -0.1922483593225479, | |
| "rewards/margins": 0.17367199063301086, | |
| "rewards/rejected": -0.36592036485671997, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 9.971150308006687e-07, | |
| "logits/chosen": -0.6868435144424438, | |
| "logits/rejected": -0.6831103563308716, | |
| "logps/chosen": -7.650822639465332, | |
| "logps/rejected": -13.520294189453125, | |
| "loss": 0.6184, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.1557883322238922, | |
| "rewards/margins": 0.309310644865036, | |
| "rewards/rejected": -0.4650990068912506, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 9.966546331768192e-07, | |
| "logits/chosen": -0.6930921673774719, | |
| "logits/rejected": -0.656936764717102, | |
| "logps/chosen": -7.236788749694824, | |
| "logps/rejected": -12.021242141723633, | |
| "loss": 0.617, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.16039922833442688, | |
| "rewards/margins": 0.18796458840370178, | |
| "rewards/rejected": -0.34836381673812866, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 9.961602898685223e-07, | |
| "logits/chosen": -0.6678417921066284, | |
| "logits/rejected": -0.6494520306587219, | |
| "logps/chosen": -8.197237014770508, | |
| "logps/rejected": -13.004777908325195, | |
| "loss": 0.6192, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -0.16785219311714172, | |
| "rewards/margins": 0.2895987629890442, | |
| "rewards/rejected": -0.4574509561061859, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 9.956320346634875e-07, | |
| "logits/chosen": -0.6635026931762695, | |
| "logits/rejected": -0.6535638570785522, | |
| "logps/chosen": -8.445914268493652, | |
| "logps/rejected": -14.642396926879883, | |
| "loss": 0.6054, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -0.2178197205066681, | |
| "rewards/margins": 0.33262819051742554, | |
| "rewards/rejected": -0.5504478812217712, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 9.95069903667256e-07, | |
| "logits/chosen": -0.6291212439537048, | |
| "logits/rejected": -0.5968618392944336, | |
| "logps/chosen": -8.441099166870117, | |
| "logps/rejected": -13.59777545928955, | |
| "loss": 0.6019, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -0.262218177318573, | |
| "rewards/margins": 0.2801818251609802, | |
| "rewards/rejected": -0.5424000024795532, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 9.944739353007341e-07, | |
| "logits/chosen": -0.6783192753791809, | |
| "logits/rejected": -0.6327847242355347, | |
| "logps/chosen": -8.718297004699707, | |
| "logps/rejected": -15.599523544311523, | |
| "loss": 0.5953, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -0.2528052031993866, | |
| "rewards/margins": 0.31637701392173767, | |
| "rewards/rejected": -0.569182276725769, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 9.938441702975689e-07, | |
| "logits/chosen": -0.6378843784332275, | |
| "logits/rejected": -0.6440542936325073, | |
| "logps/chosen": -9.802359580993652, | |
| "logps/rejected": -14.98701286315918, | |
| "loss": 0.5907, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -0.3599366247653961, | |
| "rewards/margins": 0.2988061010837555, | |
| "rewards/rejected": -0.6587426066398621, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 9.931806517013612e-07, | |
| "logits/chosen": -0.6049096584320068, | |
| "logits/rejected": -0.6118007302284241, | |
| "logps/chosen": -7.990042686462402, | |
| "logps/rejected": -13.457636833190918, | |
| "loss": 0.5959, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.28124743700027466, | |
| "rewards/margins": 0.39520224928855896, | |
| "rewards/rejected": -0.6764496564865112, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_logits/chosen": -1.100651502609253, | |
| "eval_logits/rejected": -1.090796947479248, | |
| "eval_logps/chosen": -9.126388549804688, | |
| "eval_logps/rejected": -11.862701416015625, | |
| "eval_loss": 0.6825469136238098, | |
| "eval_rewards/accuracies": 0.5799839496612549, | |
| "eval_rewards/chosen": -0.27851250767707825, | |
| "eval_rewards/margins": 0.10310473293066025, | |
| "eval_rewards/rejected": -0.3816172480583191, | |
| "eval_runtime": 646.4588, | |
| "eval_samples_per_second": 30.696, | |
| "eval_steps_per_second": 0.481, | |
| "step": 200 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 2000, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "total_flos": 0.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |