| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 7.272, | |
| "eval_steps": 100, | |
| "global_step": 56, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.6153846153846154, | |
| "grad_norm": 127.33006286621094, | |
| "learning_rate": 5e-07, | |
| "logits/chosen": 0.19238418340682983, | |
| "logits/rejected": 0.21956193447113037, | |
| "logps/chosen": -58.537498474121094, | |
| "logps/rejected": -66.73164367675781, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 1.2307692307692308, | |
| "grad_norm": 136.6772918701172, | |
| "learning_rate": 4.752422169756047e-07, | |
| "logits/chosen": 0.22914010286331177, | |
| "logits/rejected": 0.28378042578697205, | |
| "logps/chosen": -76.12860870361328, | |
| "logps/rejected": -70.64468383789062, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 1.8461538461538463, | |
| "grad_norm": 143.6078338623047, | |
| "learning_rate": 4.058724504646834e-07, | |
| "logits/chosen": 0.24467766284942627, | |
| "logits/rejected": 0.21774037182331085, | |
| "logps/chosen": -55.543739318847656, | |
| "logps/rejected": -77.82548522949219, | |
| "loss": 0.7227, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.029082417488098145, | |
| "rewards/margins": -0.0368955135345459, | |
| "rewards/rejected": 0.06597793102264404, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 2.4615384615384617, | |
| "grad_norm": 131.76895141601562, | |
| "learning_rate": 3.056302334890786e-07, | |
| "logits/chosen": 0.261148065328598, | |
| "logits/rejected": 0.2774331569671631, | |
| "logps/chosen": -66.54498291015625, | |
| "logps/rejected": -66.6764907836914, | |
| "loss": 0.6577, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.008320659399032593, | |
| "rewards/margins": 0.04801854491233826, | |
| "rewards/rejected": -0.039697885513305664, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 3.076923076923077, | |
| "grad_norm": 126.15006256103516, | |
| "learning_rate": 1.9436976651092142e-07, | |
| "logits/chosen": 0.36693644523620605, | |
| "logits/rejected": 0.3742007613182068, | |
| "logps/chosen": -95.5995864868164, | |
| "logps/rejected": -113.8344955444336, | |
| "loss": 0.597, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": 0.10893827676773071, | |
| "rewards/margins": 0.25116902589797974, | |
| "rewards/rejected": -0.14223074913024902, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 3.6923076923076925, | |
| "grad_norm": 100.18375396728516, | |
| "learning_rate": 9.412754953531663e-08, | |
| "logits/chosen": 0.24576663970947266, | |
| "logits/rejected": 0.25556063652038574, | |
| "logps/chosen": -80.51493072509766, | |
| "logps/rejected": -84.49187469482422, | |
| "loss": 0.5787, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.17812098562717438, | |
| "rewards/margins": 0.3286281228065491, | |
| "rewards/rejected": -0.1505071520805359, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 4.3076923076923075, | |
| "grad_norm": 119.61666870117188, | |
| "learning_rate": 2.475778302439524e-08, | |
| "logits/chosen": 0.21196235716342926, | |
| "logits/rejected": 0.2644736170768738, | |
| "logps/chosen": -69.17505645751953, | |
| "logps/rejected": -66.96580505371094, | |
| "loss": 0.563, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.16142475605010986, | |
| "rewards/margins": 0.3076605200767517, | |
| "rewards/rejected": -0.14623576402664185, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 4.923076923076923, | |
| "grad_norm": 96.98345184326172, | |
| "learning_rate": 0.0, | |
| "logits/chosen": 0.318248987197876, | |
| "logits/rejected": 0.2900750935077667, | |
| "logps/chosen": -55.142822265625, | |
| "logps/rejected": -67.91665649414062, | |
| "loss": 0.5369, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.07582578808069229, | |
| "rewards/margins": 0.24877893924713135, | |
| "rewards/rejected": -0.17295315861701965, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 1.256, | |
| "grad_norm": 84.79383087158203, | |
| "learning_rate": 4.955718126821722e-07, | |
| "logits/chosen": 0.29836222529411316, | |
| "logits/rejected": 0.32582682371139526, | |
| "logps/chosen": -83.86153411865234, | |
| "logps/rejected": -77.13251495361328, | |
| "loss": 0.6714, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.023729726672172546, | |
| "rewards/margins": 0.05816943943500519, | |
| "rewards/rejected": -0.03443971276283264, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 1.384, | |
| "grad_norm": 88.73702239990234, | |
| "learning_rate": 4.921457902821578e-07, | |
| "logits/chosen": 0.21312400698661804, | |
| "logits/rejected": 0.23579223453998566, | |
| "logps/chosen": -68.02887725830078, | |
| "logps/rejected": -76.00659942626953, | |
| "loss": 0.6785, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.005893569439649582, | |
| "rewards/margins": 0.031381912529468536, | |
| "rewards/rejected": -0.025488346815109253, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 1.512, | |
| "grad_norm": 96.98503875732422, | |
| "learning_rate": 4.877641290737883e-07, | |
| "logits/chosen": 0.2715354561805725, | |
| "logits/rejected": 0.27474918961524963, | |
| "logps/chosen": -66.76045227050781, | |
| "logps/rejected": -78.57473754882812, | |
| "loss": 0.6883, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.02488572895526886, | |
| "rewards/margins": 0.004496380686759949, | |
| "rewards/rejected": -0.02938210964202881, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 1.6400000000000001, | |
| "grad_norm": 82.92364501953125, | |
| "learning_rate": 4.824441214720628e-07, | |
| "logits/chosen": 0.2345450520515442, | |
| "logits/rejected": 0.2685388922691345, | |
| "logps/chosen": -71.50077056884766, | |
| "logps/rejected": -66.5575942993164, | |
| "loss": 0.6724, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.06529319286346436, | |
| "rewards/margins": 0.09461906552314758, | |
| "rewards/rejected": -0.029325872659683228, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 1.768, | |
| "grad_norm": 90.68313598632812, | |
| "learning_rate": 4.762067631165049e-07, | |
| "logits/chosen": 0.3173472583293915, | |
| "logits/rejected": 0.31548872590065, | |
| "logps/chosen": -62.33905792236328, | |
| "logps/rejected": -69.90167236328125, | |
| "loss": 0.6621, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.0690992921590805, | |
| "rewards/margins": 0.08333452045917511, | |
| "rewards/rejected": -0.014235228300094604, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 1.896, | |
| "grad_norm": 91.8805923461914, | |
| "learning_rate": 4.6907667001096585e-07, | |
| "logits/chosen": 0.24450257420539856, | |
| "logits/rejected": 0.27835142612457275, | |
| "logps/chosen": -75.39544677734375, | |
| "logps/rejected": -92.54512786865234, | |
| "loss": 0.6806, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.11982224881649017, | |
| "rewards/margins": 0.14259418845176697, | |
| "rewards/rejected": -0.022771939635276794, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 2.024, | |
| "grad_norm": 93.57877349853516, | |
| "learning_rate": 4.6108198137550377e-07, | |
| "logits/chosen": 0.2691981792449951, | |
| "logits/rejected": 0.29418689012527466, | |
| "logps/chosen": -65.36813354492188, | |
| "logps/rejected": -86.02149963378906, | |
| "loss": 0.6694, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.0014134570956230164, | |
| "rewards/margins": 0.08445831388235092, | |
| "rewards/rejected": -0.08587177097797394, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 2.152, | |
| "grad_norm": 90.0985336303711, | |
| "learning_rate": 4.5225424859373684e-07, | |
| "logits/chosen": 0.29637423157691956, | |
| "logits/rejected": 0.3497394323348999, | |
| "logps/chosen": -78.20895385742188, | |
| "logps/rejected": -65.3874282836914, | |
| "loss": 0.6953, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.05982813239097595, | |
| "rewards/margins": 0.08022981882095337, | |
| "rewards/rejected": -0.020401686429977417, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 2.2800000000000002, | |
| "grad_norm": 91.4386978149414, | |
| "learning_rate": 4.426283106939473e-07, | |
| "logits/chosen": 0.3221435546875, | |
| "logits/rejected": 0.34331709146499634, | |
| "logps/chosen": -73.48678588867188, | |
| "logps/rejected": -81.2340087890625, | |
| "loss": 0.6876, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": 0.06735238432884216, | |
| "rewards/margins": 0.05368679761886597, | |
| "rewards/rejected": 0.013665586709976196, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 2.408, | |
| "grad_norm": 98.76863098144531, | |
| "learning_rate": 4.3224215685535287e-07, | |
| "logits/chosen": 0.2613396942615509, | |
| "logits/rejected": 0.2849021553993225, | |
| "logps/chosen": -89.08374786376953, | |
| "logps/rejected": -69.78533172607422, | |
| "loss": 0.6688, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": -0.02113175392150879, | |
| "rewards/margins": 0.001418381929397583, | |
| "rewards/rejected": -0.022550135850906372, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 2.536, | |
| "grad_norm": 81.9192123413086, | |
| "learning_rate": 4.2113677648217216e-07, | |
| "logits/chosen": 0.22890335321426392, | |
| "logits/rejected": 0.23874913156032562, | |
| "logps/chosen": -68.76072692871094, | |
| "logps/rejected": -66.74049377441406, | |
| "loss": 0.6763, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": 0.09503498673439026, | |
| "rewards/margins": 0.086346834897995, | |
| "rewards/rejected": 0.008688151836395264, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 2.664, | |
| "grad_norm": 95.22876739501953, | |
| "learning_rate": 4.0935599743717244e-07, | |
| "logits/chosen": 0.32450735569000244, | |
| "logits/rejected": 0.33199459314346313, | |
| "logps/chosen": -91.43396759033203, | |
| "logps/rejected": -83.63076782226562, | |
| "loss": 0.6491, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.05055028200149536, | |
| "rewards/margins": 0.13369867205619812, | |
| "rewards/rejected": -0.08314839005470276, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 2.792, | |
| "grad_norm": 92.8243408203125, | |
| "learning_rate": 3.9694631307311825e-07, | |
| "logits/chosen": 0.3116016983985901, | |
| "logits/rejected": 0.3494156002998352, | |
| "logps/chosen": -67.47573852539062, | |
| "logps/rejected": -71.52774047851562, | |
| "loss": 0.6736, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.03751923143863678, | |
| "rewards/margins": 0.048606112599372864, | |
| "rewards/rejected": -0.011086881160736084, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "grad_norm": 89.23714447021484, | |
| "learning_rate": 3.839566987447491e-07, | |
| "logits/chosen": 0.22846412658691406, | |
| "logits/rejected": 0.21796303987503052, | |
| "logps/chosen": -65.1306381225586, | |
| "logps/rejected": -71.10429382324219, | |
| "loss": 0.6992, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.009614139795303345, | |
| "rewards/margins": -0.01433388888835907, | |
| "rewards/rejected": 0.004719749093055725, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 3.048, | |
| "grad_norm": 87.72737884521484, | |
| "learning_rate": 3.704384185254288e-07, | |
| "logits/chosen": 0.2647473216056824, | |
| "logits/rejected": 0.2934381663799286, | |
| "logps/chosen": -63.67654037475586, | |
| "logps/rejected": -62.632781982421875, | |
| "loss": 0.6676, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.013809099793434143, | |
| "rewards/margins": 0.06607498228549957, | |
| "rewards/rejected": -0.05226588249206543, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 3.176, | |
| "grad_norm": 90.60627746582031, | |
| "learning_rate": 3.5644482289126813e-07, | |
| "logits/chosen": 0.32062453031539917, | |
| "logits/rejected": 0.2993485927581787, | |
| "logps/chosen": -68.11253356933594, | |
| "logps/rejected": -99.29121398925781, | |
| "loss": 0.6716, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.008185192942619324, | |
| "rewards/margins": 0.056953445076942444, | |
| "rewards/rejected": -0.04876825213432312, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 3.304, | |
| "grad_norm": 94.25776672363281, | |
| "learning_rate": 3.4203113817116953e-07, | |
| "logits/chosen": 0.2894556522369385, | |
| "logits/rejected": 0.29452645778656006, | |
| "logps/chosen": -64.85166931152344, | |
| "logps/rejected": -63.267059326171875, | |
| "loss": 0.6803, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.01005951315164566, | |
| "rewards/margins": 0.05620530992746353, | |
| "rewards/rejected": -0.04614579677581787, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 3.432, | |
| "grad_norm": 95.0955810546875, | |
| "learning_rate": 3.272542485937368e-07, | |
| "logits/chosen": 0.2319055050611496, | |
| "logits/rejected": 0.287945032119751, | |
| "logps/chosen": -72.57554626464844, | |
| "logps/rejected": -66.64920806884766, | |
| "loss": 0.6887, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": 0.027968034148216248, | |
| "rewards/margins": 0.0742889791727066, | |
| "rewards/rejected": -0.046320945024490356, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "grad_norm": 88.33543395996094, | |
| "learning_rate": 3.121724717912138e-07, | |
| "logits/chosen": 0.309038907289505, | |
| "logits/rejected": 0.321429967880249, | |
| "logps/chosen": -97.70095825195312, | |
| "logps/rejected": -82.71894073486328, | |
| "loss": 0.659, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": 0.08497677743434906, | |
| "rewards/margins": 0.1083778589963913, | |
| "rewards/rejected": -0.023401081562042236, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 3.6879999999999997, | |
| "grad_norm": 93.63185119628906, | |
| "learning_rate": 2.968453286464312e-07, | |
| "logits/chosen": 0.2761862277984619, | |
| "logits/rejected": 0.27546417713165283, | |
| "logps/chosen": -75.79278564453125, | |
| "logps/rejected": -75.79965209960938, | |
| "loss": 0.6746, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": 0.01948818564414978, | |
| "rewards/margins": 0.04068872332572937, | |
| "rewards/rejected": -0.02120053768157959, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 3.816, | |
| "grad_norm": 90.8388671875, | |
| "learning_rate": 2.8133330839107604e-07, | |
| "logits/chosen": 0.280830055475235, | |
| "logits/rejected": 0.2866876423358917, | |
| "logps/chosen": -66.83413696289062, | |
| "logps/rejected": -67.01375579833984, | |
| "loss": 0.6725, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": 0.013940572738647461, | |
| "rewards/margins": 0.014010876417160034, | |
| "rewards/rejected": -7.030367851257324e-05, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 3.944, | |
| "grad_norm": 92.31433868408203, | |
| "learning_rate": 2.6569762988232837e-07, | |
| "logits/chosen": 0.2993810474872589, | |
| "logits/rejected": 0.29364442825317383, | |
| "logps/chosen": -63.468109130859375, | |
| "logps/rejected": -77.49847412109375, | |
| "loss": 0.6981, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.010666653513908386, | |
| "rewards/margins": 0.03114195168018341, | |
| "rewards/rejected": -0.020475298166275024, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 4.072, | |
| "grad_norm": 85.3602294921875, | |
| "learning_rate": 2.5e-07, | |
| "logits/chosen": 0.26220929622650146, | |
| "logits/rejected": 0.23664042353630066, | |
| "logps/chosen": -69.07573699951172, | |
| "logps/rejected": -72.7073974609375, | |
| "loss": 0.6799, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.0597347617149353, | |
| "rewards/margins": 0.12319907546043396, | |
| "rewards/rejected": -0.06346431374549866, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "grad_norm": 89.2237548828125, | |
| "learning_rate": 2.3430237011767164e-07, | |
| "logits/chosen": 0.2281663417816162, | |
| "logits/rejected": 0.24119029939174652, | |
| "logps/chosen": -75.16613006591797, | |
| "logps/rejected": -64.49757385253906, | |
| "loss": 0.6633, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.040758922696113586, | |
| "rewards/margins": 0.05312100052833557, | |
| "rewards/rejected": -0.012362077832221985, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 4.328, | |
| "grad_norm": 95.59449768066406, | |
| "learning_rate": 2.1866669160892389e-07, | |
| "logits/chosen": 0.3119271993637085, | |
| "logits/rejected": 0.30429312586784363, | |
| "logps/chosen": -67.04680633544922, | |
| "logps/rejected": -76.78421020507812, | |
| "loss": 0.674, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": 0.04746510088443756, | |
| "rewards/margins": 0.08129900693893433, | |
| "rewards/rejected": -0.033833906054496765, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 4.456, | |
| "grad_norm": 103.24285888671875, | |
| "learning_rate": 2.0315467135356878e-07, | |
| "logits/chosen": 0.28600916266441345, | |
| "logits/rejected": 0.30370771884918213, | |
| "logps/chosen": -92.84146118164062, | |
| "logps/rejected": -109.2697982788086, | |
| "loss": 0.694, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -0.03957655280828476, | |
| "rewards/margins": -0.023300133645534515, | |
| "rewards/rejected": -0.016276419162750244, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 4.584, | |
| "grad_norm": 92.40351104736328, | |
| "learning_rate": 1.8782752820878633e-07, | |
| "logits/chosen": 0.25603896379470825, | |
| "logits/rejected": 0.2662765681743622, | |
| "logps/chosen": -72.62451171875, | |
| "logps/rejected": -60.2940559387207, | |
| "loss": 0.6891, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": 0.02680887281894684, | |
| "rewards/margins": 0.04961217939853668, | |
| "rewards/rejected": -0.022803306579589844, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 4.712, | |
| "grad_norm": 84.56185913085938, | |
| "learning_rate": 1.7274575140626315e-07, | |
| "logits/chosen": 0.3107318878173828, | |
| "logits/rejected": 0.33106040954589844, | |
| "logps/chosen": -86.22938537597656, | |
| "logps/rejected": -76.20439910888672, | |
| "loss": 0.6649, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": 0.027996808290481567, | |
| "rewards/margins": 0.06501305848360062, | |
| "rewards/rejected": -0.03701625019311905, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "grad_norm": 87.72008514404297, | |
| "learning_rate": 1.579688618288305e-07, | |
| "logits/chosen": 0.3074452877044678, | |
| "logits/rejected": 0.31057560443878174, | |
| "logps/chosen": -77.69036865234375, | |
| "logps/rejected": -68.51107025146484, | |
| "loss": 0.6563, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": 0.0846804529428482, | |
| "rewards/margins": 0.1669072061777115, | |
| "rewards/rejected": -0.08222675323486328, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 4.968, | |
| "grad_norm": 78.48970031738281, | |
| "learning_rate": 1.4355517710873182e-07, | |
| "logits/chosen": 0.27899622917175293, | |
| "logits/rejected": 0.2892475724220276, | |
| "logps/chosen": -68.76217651367188, | |
| "logps/rejected": -72.50349426269531, | |
| "loss": 0.6612, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.03485181927680969, | |
| "rewards/margins": 0.05903954803943634, | |
| "rewards/rejected": -0.024187728762626648, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 5.096, | |
| "grad_norm": 102.67794036865234, | |
| "learning_rate": 1.2956158147457114e-07, | |
| "logits/chosen": 0.3237246870994568, | |
| "logits/rejected": 0.34282439947128296, | |
| "logps/chosen": -80.13423156738281, | |
| "logps/rejected": -74.20858764648438, | |
| "loss": 0.6689, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.04920327663421631, | |
| "rewards/margins": 0.1548747569322586, | |
| "rewards/rejected": -0.1056714802980423, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 5.224, | |
| "grad_norm": 90.3107681274414, | |
| "learning_rate": 1.1604330125525078e-07, | |
| "logits/chosen": 0.29913192987442017, | |
| "logits/rejected": 0.2973610460758209, | |
| "logps/chosen": -81.41338348388672, | |
| "logps/rejected": -78.10675048828125, | |
| "loss": 0.6817, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.08691957592964172, | |
| "rewards/margins": 0.09112322330474854, | |
| "rewards/rejected": -0.0042036473751068115, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 5.352, | |
| "grad_norm": 86.05548858642578, | |
| "learning_rate": 1.0305368692688174e-07, | |
| "logits/chosen": 0.26672640442848206, | |
| "logits/rejected": 0.2698957026004791, | |
| "logps/chosen": -82.20582580566406, | |
| "logps/rejected": -72.3929443359375, | |
| "loss": 0.6793, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.004676908254623413, | |
| "rewards/margins": 0.028314650058746338, | |
| "rewards/rejected": -0.03299155831336975, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 5.48, | |
| "grad_norm": 86.79624938964844, | |
| "learning_rate": 9.064400256282755e-08, | |
| "logits/chosen": 0.3021017014980316, | |
| "logits/rejected": 0.29037410020828247, | |
| "logps/chosen": -60.563438415527344, | |
| "logps/rejected": -72.60798645019531, | |
| "loss": 0.6688, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.015075430274009705, | |
| "rewards/margins": 0.07287518680095673, | |
| "rewards/rejected": -0.05779975652694702, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 5.608, | |
| "grad_norm": 92.15886688232422, | |
| "learning_rate": 7.886322351782782e-08, | |
| "logits/chosen": 0.26732951402664185, | |
| "logits/rejected": 0.30227866768836975, | |
| "logps/chosen": -73.15177917480469, | |
| "logps/rejected": -78.50798797607422, | |
| "loss": 0.6841, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.02876923978328705, | |
| "rewards/margins": 0.06183256208896637, | |
| "rewards/rejected": -0.03306332230567932, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 5.736, | |
| "grad_norm": 89.39374542236328, | |
| "learning_rate": 6.775784314464716e-08, | |
| "logits/chosen": 0.25305798649787903, | |
| "logits/rejected": 0.2594181001186371, | |
| "logps/chosen": -86.95756530761719, | |
| "logps/rejected": -77.09736633300781, | |
| "loss": 0.6691, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": 0.0014192461967468262, | |
| "rewards/margins": 0.11444368958473206, | |
| "rewards/rejected": -0.11302444338798523, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 5.864, | |
| "grad_norm": 87.49380493164062, | |
| "learning_rate": 5.737168930605271e-08, | |
| "logits/chosen": 0.3325170874595642, | |
| "logits/rejected": 0.32772064208984375, | |
| "logps/chosen": -72.07937622070312, | |
| "logps/rejected": -83.23653411865234, | |
| "loss": 0.6907, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": 0.033388733863830566, | |
| "rewards/margins": 0.23375508189201355, | |
| "rewards/rejected": -0.20036634802818298, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 5.992, | |
| "grad_norm": 94.16134643554688, | |
| "learning_rate": 4.774575140626316e-08, | |
| "logits/chosen": 0.28719452023506165, | |
| "logits/rejected": 0.31415650248527527, | |
| "logps/chosen": -80.78883361816406, | |
| "logps/rejected": -83.40714263916016, | |
| "loss": 0.6694, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": 0.0021561384201049805, | |
| "rewards/margins": 0.0118083655834198, | |
| "rewards/rejected": -0.00965222716331482, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "grad_norm": 89.89035034179688, | |
| "learning_rate": 3.8918018624496286e-08, | |
| "logits/chosen": 0.2381378412246704, | |
| "logits/rejected": 0.24997369945049286, | |
| "logps/chosen": -66.21188354492188, | |
| "logps/rejected": -67.53558349609375, | |
| "loss": 0.6703, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.003077469766139984, | |
| "rewards/margins": 0.0005584284663200378, | |
| "rewards/rejected": 0.0025190412998199463, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 6.248, | |
| "grad_norm": 84.92794036865234, | |
| "learning_rate": 3.092332998903416e-08, | |
| "logits/chosen": 0.2564837634563446, | |
| "logits/rejected": 0.28156182169914246, | |
| "logps/chosen": -72.13143157958984, | |
| "logps/rejected": -85.50643157958984, | |
| "loss": 0.6771, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.03621651977300644, | |
| "rewards/margins": 0.013661496341228485, | |
| "rewards/rejected": -0.049878016114234924, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 6.376, | |
| "grad_norm": 92.3100357055664, | |
| "learning_rate": 2.379323688349516e-08, | |
| "logits/chosen": 0.2702118158340454, | |
| "logits/rejected": 0.2811765968799591, | |
| "logps/chosen": -80.61731719970703, | |
| "logps/rejected": -95.37781524658203, | |
| "loss": 0.6677, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.03306543827056885, | |
| "rewards/margins": 0.09397777915000916, | |
| "rewards/rejected": -0.06091234087944031, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 6.504, | |
| "grad_norm": 83.64833068847656, | |
| "learning_rate": 1.7555878527937163e-08, | |
| "logits/chosen": 0.26621848344802856, | |
| "logits/rejected": 0.2580479383468628, | |
| "logps/chosen": -61.17379379272461, | |
| "logps/rejected": -70.72584533691406, | |
| "loss": 0.6766, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": 0.011742278933525085, | |
| "rewards/margins": 0.03917151689529419, | |
| "rewards/rejected": -0.027429237961769104, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 6.632, | |
| "grad_norm": 94.73871612548828, | |
| "learning_rate": 1.2235870926211616e-08, | |
| "logits/chosen": 0.21123512089252472, | |
| "logits/rejected": 0.21980169415473938, | |
| "logps/chosen": -55.80116271972656, | |
| "logps/rejected": -61.21021270751953, | |
| "loss": 0.6897, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.03383632004261017, | |
| "rewards/margins": 0.027702882885932922, | |
| "rewards/rejected": -0.06153920292854309, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 6.76, | |
| "grad_norm": 90.2896499633789, | |
| "learning_rate": 7.85420971784223e-09, | |
| "logits/chosen": 0.28194403648376465, | |
| "logits/rejected": 0.314169704914093, | |
| "logps/chosen": -83.96858978271484, | |
| "logps/rejected": -84.47467803955078, | |
| "loss": 0.687, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.06711554527282715, | |
| "rewards/margins": 0.007981911301612854, | |
| "rewards/rejected": 0.059133633971214294, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 6.888, | |
| "grad_norm": 86.9649887084961, | |
| "learning_rate": 4.4281873178278475e-09, | |
| "logits/chosen": 0.2565808892250061, | |
| "logits/rejected": 0.26864296197891235, | |
| "logps/chosen": -60.76387405395508, | |
| "logps/rejected": -57.71691131591797, | |
| "loss": 0.6633, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.027324259281158447, | |
| "rewards/margins": 0.03995504975318909, | |
| "rewards/rejected": -0.01263079047203064, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 7.016, | |
| "grad_norm": 91.96407318115234, | |
| "learning_rate": 1.9713246713805587e-09, | |
| "logits/chosen": 0.2563447952270508, | |
| "logits/rejected": 0.23585036396980286, | |
| "logps/chosen": -64.38143157958984, | |
| "logps/rejected": -73.07710266113281, | |
| "loss": 0.701, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.0030507892370224, | |
| "rewards/margins": 0.019449278712272644, | |
| "rewards/rejected": -0.016398489475250244, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 7.144, | |
| "grad_norm": 92.31555938720703, | |
| "learning_rate": 4.933178929321102e-10, | |
| "logits/chosen": 0.27761310338974, | |
| "logits/rejected": 0.28139054775238037, | |
| "logps/chosen": -86.69955444335938, | |
| "logps/rejected": -84.17654418945312, | |
| "loss": 0.6593, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": 0.09960392117500305, | |
| "rewards/margins": 0.13018175959587097, | |
| "rewards/rejected": -0.03057783842086792, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 7.272, | |
| "grad_norm": 88.12805938720703, | |
| "learning_rate": 0.0, | |
| "logits/chosen": 0.28197741508483887, | |
| "logits/rejected": 0.3240779936313629, | |
| "logps/chosen": -61.40129852294922, | |
| "logps/rejected": -71.11226654052734, | |
| "loss": 0.6692, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.014597773551940918, | |
| "rewards/margins": 0.04788690805435181, | |
| "rewards/rejected": -0.062484681606292725, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 7.272, | |
| "step": 56, | |
| "total_flos": 0.0, | |
| "train_loss": 0.0, | |
| "train_runtime": 5.7891, | |
| "train_samples_per_second": 138.19, | |
| "train_steps_per_second": 1.382 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 8, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 8, | |
| "save_steps": 400, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |