| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 19.414225941422593, |
| "eval_steps": 500, |
| "global_step": 580, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.33472803347280333, |
| "grad_norm": 0.4676998555660248, |
| "learning_rate": 8.620689655172415e-07, |
| "logits/chosen": 1.7077701091766357, |
| "logits/rejected": 1.8646482229232788, |
| "logps/chosen": -85.7728271484375, |
| "logps/rejected": -88.1952896118164, |
| "loss": 0.6938, |
| "rewards/accuracies": 0.32499998807907104, |
| "rewards/chosen": -0.004770822823047638, |
| "rewards/margins": -0.007881464436650276, |
| "rewards/rejected": 0.0031106427777558565, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.6694560669456067, |
| "grad_norm": 0.43230223655700684, |
| "learning_rate": 1.724137931034483e-06, |
| "logits/chosen": 1.8023220300674438, |
| "logits/rejected": 1.8210970163345337, |
| "logps/chosen": -78.37618255615234, |
| "logps/rejected": -75.44720458984375, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.48750001192092896, |
| "rewards/chosen": -0.0056939031928777695, |
| "rewards/margins": -0.003848772030323744, |
| "rewards/rejected": -0.001845130929723382, |
| "step": 20 |
| }, |
| { |
| "epoch": 1.00418410041841, |
| "grad_norm": 2.431753396987915, |
| "learning_rate": 2.5862068965517246e-06, |
| "logits/chosen": 1.8643144369125366, |
| "logits/rejected": 1.8500900268554688, |
| "logps/chosen": -86.84412384033203, |
| "logps/rejected": -90.78925323486328, |
| "loss": 0.6946, |
| "rewards/accuracies": 0.44999998807907104, |
| "rewards/chosen": -0.0008674233104102314, |
| "rewards/margins": -0.008064134046435356, |
| "rewards/rejected": 0.0071967123076319695, |
| "step": 30 |
| }, |
| { |
| "epoch": 1.3389121338912133, |
| "grad_norm": 0.5327289700508118, |
| "learning_rate": 3.448275862068966e-06, |
| "logits/chosen": 1.7701479196548462, |
| "logits/rejected": 1.7749736309051514, |
| "logps/chosen": -83.61552429199219, |
| "logps/rejected": -72.89176177978516, |
| "loss": 0.6939, |
| "rewards/accuracies": 0.4625000059604645, |
| "rewards/chosen": -0.0019066383829340339, |
| "rewards/margins": -0.0035008196718990803, |
| "rewards/rejected": 0.0015941811725497246, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.6736401673640167, |
| "grad_norm": 0.5017096996307373, |
| "learning_rate": 4.310344827586207e-06, |
| "logits/chosen": 1.7465788125991821, |
| "logits/rejected": 1.7925498485565186, |
| "logps/chosen": -84.85249328613281, |
| "logps/rejected": -88.79469299316406, |
| "loss": 0.6938, |
| "rewards/accuracies": 0.550000011920929, |
| "rewards/chosen": 0.0003656863118521869, |
| "rewards/margins": 0.00012080222222721204, |
| "rewards/rejected": 0.00024488387862220407, |
| "step": 50 |
| }, |
| { |
| "epoch": 2.00836820083682, |
| "grad_norm": 0.4994066059589386, |
| "learning_rate": 4.999818897894192e-06, |
| "logits/chosen": 1.8359416723251343, |
| "logits/rejected": 1.814234733581543, |
| "logps/chosen": -79.98396301269531, |
| "logps/rejected": -66.29866027832031, |
| "loss": 0.6952, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": 0.00456579215824604, |
| "rewards/margins": -0.002460189163684845, |
| "rewards/rejected": 0.007025980856269598, |
| "step": 60 |
| }, |
| { |
| "epoch": 2.3430962343096233, |
| "grad_norm": 0.6425362825393677, |
| "learning_rate": 4.9934830787948756e-06, |
| "logits/chosen": 1.7550818920135498, |
| "logits/rejected": 1.7802051305770874, |
| "logps/chosen": -83.60951232910156, |
| "logps/rejected": -70.81080627441406, |
| "loss": 0.6937, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": 0.004059882368892431, |
| "rewards/margins": 0.00932090263813734, |
| "rewards/rejected": -0.005261021666228771, |
| "step": 70 |
| }, |
| { |
| "epoch": 2.6778242677824267, |
| "grad_norm": 0.5591297149658203, |
| "learning_rate": 4.978118375700895e-06, |
| "logits/chosen": 1.7742525339126587, |
| "logits/rejected": 1.8790937662124634, |
| "logps/chosen": -86.7696533203125, |
| "logps/rejected": -84.90727233886719, |
| "loss": 0.6927, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": 0.0033963967580348253, |
| "rewards/margins": 0.005500240251421928, |
| "rewards/rejected": -0.0021038432605564594, |
| "step": 80 |
| }, |
| { |
| "epoch": 3.01255230125523, |
| "grad_norm": 0.6774541735649109, |
| "learning_rate": 4.953780424089803e-06, |
| "logits/chosen": 1.8815631866455078, |
| "logits/rejected": 1.911620855331421, |
| "logps/chosen": -92.14398193359375, |
| "logps/rejected": -82.83556365966797, |
| "loss": 0.6923, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.007645039353519678, |
| "rewards/margins": 0.005134005565196276, |
| "rewards/rejected": 0.0025110342539846897, |
| "step": 90 |
| }, |
| { |
| "epoch": 3.3472803347280333, |
| "grad_norm": 0.6908814907073975, |
| "learning_rate": 4.920557351506409e-06, |
| "logits/chosen": 1.7718560695648193, |
| "logits/rejected": 1.8823477029800415, |
| "logps/chosen": -91.2622299194336, |
| "logps/rejected": -96.467529296875, |
| "loss": 0.6894, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": 0.004941435065120459, |
| "rewards/margins": 0.01582186669111252, |
| "rewards/rejected": -0.010880433022975922, |
| "step": 100 |
| }, |
| { |
| "epoch": 3.6820083682008367, |
| "grad_norm": 0.6954344511032104, |
| "learning_rate": 4.878569458453592e-06, |
| "logits/chosen": 1.6916942596435547, |
| "logits/rejected": 1.7603965997695923, |
| "logps/chosen": -84.66748046875, |
| "logps/rejected": -103.5328369140625, |
| "loss": 0.6889, |
| "rewards/accuracies": 0.5375000238418579, |
| "rewards/chosen": -0.003184904810041189, |
| "rewards/margins": 0.007628746330738068, |
| "rewards/rejected": -0.01081365067511797, |
| "step": 110 |
| }, |
| { |
| "epoch": 4.01673640167364, |
| "grad_norm": 0.6846771240234375, |
| "learning_rate": 4.827968782785062e-06, |
| "logits/chosen": 1.8839175701141357, |
| "logits/rejected": 1.956974744796753, |
| "logps/chosen": -76.89198303222656, |
| "logps/rejected": -95.03202056884766, |
| "loss": 0.6886, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": 0.009448022581636906, |
| "rewards/margins": 0.018458742648363113, |
| "rewards/rejected": -0.009010720066726208, |
| "step": 120 |
| }, |
| { |
| "epoch": 4.351464435146443, |
| "grad_norm": 0.762366771697998, |
| "learning_rate": 4.7689385491773934e-06, |
| "logits/chosen": 1.8294252157211304, |
| "logits/rejected": 1.8144447803497314, |
| "logps/chosen": -87.03471374511719, |
| "logps/rejected": -73.86766052246094, |
| "loss": 0.6847, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": -0.01010747067630291, |
| "rewards/margins": 0.01468165498226881, |
| "rewards/rejected": -0.024789121001958847, |
| "step": 130 |
| }, |
| { |
| "epoch": 4.686192468619247, |
| "grad_norm": 0.7172746658325195, |
| "learning_rate": 4.70169250567482e-06, |
| "logits/chosen": 1.7118749618530273, |
| "logits/rejected": 1.8259985446929932, |
| "logps/chosen": -68.12081146240234, |
| "logps/rejected": -75.31085205078125, |
| "loss": 0.6809, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": -0.014076923951506615, |
| "rewards/margins": 0.014298361726105213, |
| "rewards/rejected": -0.028375286608934402, |
| "step": 140 |
| }, |
| { |
| "epoch": 5.02092050209205, |
| "grad_norm": 0.7188512682914734, |
| "learning_rate": 4.626474149709127e-06, |
| "logits/chosen": 1.8864549398422241, |
| "logits/rejected": 1.8488889932632446, |
| "logps/chosen": -101.3166275024414, |
| "logps/rejected": -76.73490905761719, |
| "loss": 0.6773, |
| "rewards/accuracies": 0.5375000238418579, |
| "rewards/chosen": -0.021979983896017075, |
| "rewards/margins": 0.023767748847603798, |
| "rewards/rejected": -0.04574773460626602, |
| "step": 150 |
| }, |
| { |
| "epoch": 5.355648535564853, |
| "grad_norm": 0.8493714928627014, |
| "learning_rate": 4.54355584639723e-06, |
| "logits/chosen": 1.7861169576644897, |
| "logits/rejected": 1.7927452325820923, |
| "logps/chosen": -83.28593444824219, |
| "logps/rejected": -76.04238891601562, |
| "loss": 0.673, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.0320693664252758, |
| "rewards/margins": 0.04263792932033539, |
| "rewards/rejected": -0.07470729202032089, |
| "step": 160 |
| }, |
| { |
| "epoch": 5.690376569037657, |
| "grad_norm": 1.1378726959228516, |
| "learning_rate": 4.45323784230908e-06, |
| "logits/chosen": 1.8622329235076904, |
| "logits/rejected": 1.875741958618164, |
| "logps/chosen": -72.01219177246094, |
| "logps/rejected": -77.62818908691406, |
| "loss": 0.6679, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -0.05134737491607666, |
| "rewards/margins": 0.049249742180109024, |
| "rewards/rejected": -0.10059712082147598, |
| "step": 170 |
| }, |
| { |
| "epoch": 6.02510460251046, |
| "grad_norm": 0.847227156162262, |
| "learning_rate": 4.355847178277025e-06, |
| "logits/chosen": 1.9445765018463135, |
| "logits/rejected": 2.0065901279449463, |
| "logps/chosen": -76.14964294433594, |
| "logps/rejected": -83.71626281738281, |
| "loss": 0.6621, |
| "rewards/accuracies": 0.5874999761581421, |
| "rewards/chosen": -0.05519520118832588, |
| "rewards/margins": 0.06803703308105469, |
| "rewards/rejected": -0.12323222309350967, |
| "step": 180 |
| }, |
| { |
| "epoch": 6.359832635983263, |
| "grad_norm": 0.8368040919303894, |
| "learning_rate": 4.2517365051833564e-06, |
| "logits/chosen": 1.8116003274917603, |
| "logits/rejected": 1.8839362859725952, |
| "logps/chosen": -82.7087631225586, |
| "logps/rejected": -67.3785629272461, |
| "loss": 0.6538, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.03855596110224724, |
| "rewards/margins": 0.11562293767929077, |
| "rewards/rejected": -0.15417888760566711, |
| "step": 190 |
| }, |
| { |
| "epoch": 6.694560669456067, |
| "grad_norm": 0.8636891841888428, |
| "learning_rate": 4.141282807014034e-06, |
| "logits/chosen": 1.7250430583953857, |
| "logits/rejected": 1.775418996810913, |
| "logps/chosen": -68.51094055175781, |
| "logps/rejected": -79.88996124267578, |
| "loss": 0.657, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.11151214689016342, |
| "rewards/margins": 0.07863004505634308, |
| "rewards/rejected": -0.1901421844959259, |
| "step": 200 |
| }, |
| { |
| "epoch": 7.02928870292887, |
| "grad_norm": 0.945520281791687, |
| "learning_rate": 4.024886035802432e-06, |
| "logits/chosen": 1.7329381704330444, |
| "logits/rejected": 1.799551010131836, |
| "logps/chosen": -81.49295043945312, |
| "logps/rejected": -87.97386169433594, |
| "loss": 0.6462, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -0.09862186014652252, |
| "rewards/margins": 0.10664049535989761, |
| "rewards/rejected": -0.20526234805583954, |
| "step": 210 |
| }, |
| { |
| "epoch": 7.364016736401673, |
| "grad_norm": 0.9736716151237488, |
| "learning_rate": 3.9029676634059565e-06, |
| "logits/chosen": 1.8787791728973389, |
| "logits/rejected": 1.8832632303237915, |
| "logps/chosen": -83.21014404296875, |
| "logps/rejected": -76.4449691772461, |
| "loss": 0.6413, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.07923021167516708, |
| "rewards/margins": 0.11194779723882675, |
| "rewards/rejected": -0.19117799401283264, |
| "step": 220 |
| }, |
| { |
| "epoch": 7.698744769874477, |
| "grad_norm": 0.9238549470901489, |
| "learning_rate": 3.7759691553595214e-06, |
| "logits/chosen": 1.9044986963272095, |
| "logits/rejected": 1.8805242776870728, |
| "logps/chosen": -90.17919158935547, |
| "logps/rejected": -86.05476379394531, |
| "loss": 0.6346, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.0993318110704422, |
| "rewards/margins": 0.12289313971996307, |
| "rewards/rejected": -0.22222498059272766, |
| "step": 230 |
| }, |
| { |
| "epoch": 8.03347280334728, |
| "grad_norm": 1.0052326917648315, |
| "learning_rate": 3.6443503723320837e-06, |
| "logits/chosen": 1.873815894126892, |
| "logits/rejected": 1.8809674978256226, |
| "logps/chosen": -74.86824035644531, |
| "logps/rejected": -81.79768371582031, |
| "loss": 0.6377, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": -0.17602138221263885, |
| "rewards/margins": 0.09737586975097656, |
| "rewards/rejected": -0.2733972668647766, |
| "step": 240 |
| }, |
| { |
| "epoch": 8.368200836820083, |
| "grad_norm": 1.0167930126190186, |
| "learning_rate": 3.508587904974522e-06, |
| "logits/chosen": 1.868101716041565, |
| "logits/rejected": 1.8685039281845093, |
| "logps/chosen": -94.48133850097656, |
| "logps/rejected": -94.00556182861328, |
| "loss": 0.6216, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.1260211318731308, |
| "rewards/margins": 0.21002164483070374, |
| "rewards/rejected": -0.3360427916049957, |
| "step": 250 |
| }, |
| { |
| "epoch": 8.702928870292887, |
| "grad_norm": 0.9955905079841614, |
| "learning_rate": 3.3691733481883693e-06, |
| "logits/chosen": 1.7905946969985962, |
| "logits/rejected": 1.7784850597381592, |
| "logps/chosen": -94.9794921875, |
| "logps/rejected": -84.99113464355469, |
| "loss": 0.6193, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -0.18853916227817535, |
| "rewards/margins": 0.19185101985931396, |
| "rewards/rejected": -0.3803902268409729, |
| "step": 260 |
| }, |
| { |
| "epoch": 9.03765690376569, |
| "grad_norm": 0.943289577960968, |
| "learning_rate": 3.226611521064278e-06, |
| "logits/chosen": 1.8119779825210571, |
| "logits/rejected": 1.817567229270935, |
| "logps/chosen": -80.78002166748047, |
| "logps/rejected": -85.42848205566406, |
| "loss": 0.6201, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -0.16673071682453156, |
| "rewards/margins": 0.19530437886714935, |
| "rewards/rejected": -0.3620350658893585, |
| "step": 270 |
| }, |
| { |
| "epoch": 9.372384937238493, |
| "grad_norm": 0.9868927597999573, |
| "learning_rate": 3.0814186389357765e-06, |
| "logits/chosen": 1.816300392150879, |
| "logits/rejected": 1.7763961553573608, |
| "logps/chosen": -77.19395446777344, |
| "logps/rejected": -68.16056060791016, |
| "loss": 0.6157, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -0.21062004566192627, |
| "rewards/margins": 0.19489461183547974, |
| "rewards/rejected": -0.4055147171020508, |
| "step": 280 |
| }, |
| { |
| "epoch": 9.707112970711297, |
| "grad_norm": 1.0557409524917603, |
| "learning_rate": 2.9341204441673267e-06, |
| "logits/chosen": 1.8154022693634033, |
| "logits/rejected": 1.842792272567749, |
| "logps/chosen": -84.95299530029297, |
| "logps/rejected": -82.22926330566406, |
| "loss": 0.5972, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -0.25716403126716614, |
| "rewards/margins": 0.2636396288871765, |
| "rewards/rejected": -0.5208036303520203, |
| "step": 290 |
| }, |
| { |
| "epoch": 10.0418410041841, |
| "grad_norm": 1.0787400007247925, |
| "learning_rate": 2.785250302445062e-06, |
| "logits/chosen": 1.8255424499511719, |
| "logits/rejected": 1.8123111724853516, |
| "logps/chosen": -94.89508056640625, |
| "logps/rejected": -99.48709869384766, |
| "loss": 0.6038, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -0.25526899099349976, |
| "rewards/margins": 0.24981728196144104, |
| "rewards/rejected": -0.5050862431526184, |
| "step": 300 |
| }, |
| { |
| "epoch": 10.376569037656903, |
| "grad_norm": 0.9934303164482117, |
| "learning_rate": 2.6353472714635443e-06, |
| "logits/chosen": 1.6809011697769165, |
| "logits/rejected": 1.6883894205093384, |
| "logps/chosen": -83.52108001708984, |
| "logps/rejected": -89.83765411376953, |
| "loss": 0.5897, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -0.20843425393104553, |
| "rewards/margins": 0.2531076967716217, |
| "rewards/rejected": -0.46154195070266724, |
| "step": 310 |
| }, |
| { |
| "epoch": 10.711297071129707, |
| "grad_norm": 1.0989857912063599, |
| "learning_rate": 2.4849541490017868e-06, |
| "logits/chosen": 1.8184492588043213, |
| "logits/rejected": 1.8702239990234375, |
| "logps/chosen": -76.5416030883789, |
| "logps/rejected": -87.98771667480469, |
| "loss": 0.5951, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -0.26176151633262634, |
| "rewards/margins": 0.17908410727977753, |
| "rewards/rejected": -0.44084563851356506, |
| "step": 320 |
| }, |
| { |
| "epoch": 11.04602510460251, |
| "grad_norm": 1.1377506256103516, |
| "learning_rate": 2.3346155074564712e-06, |
| "logits/chosen": 1.7781150341033936, |
| "logits/rejected": 1.7945045232772827, |
| "logps/chosen": -94.21954345703125, |
| "logps/rejected": -87.9439926147461, |
| "loss": 0.5895, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -0.3070668578147888, |
| "rewards/margins": 0.26864537596702576, |
| "rewards/rejected": -0.5757122039794922, |
| "step": 330 |
| }, |
| { |
| "epoch": 11.380753138075313, |
| "grad_norm": 1.1378768682479858, |
| "learning_rate": 2.184875721949277e-06, |
| "logits/chosen": 1.7889184951782227, |
| "logits/rejected": 1.7863588333129883, |
| "logps/chosen": -77.19974517822266, |
| "logps/rejected": -74.92039489746094, |
| "loss": 0.577, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -0.2269454449415207, |
| "rewards/margins": 0.3004661798477173, |
| "rewards/rejected": -0.5274116396903992, |
| "step": 340 |
| }, |
| { |
| "epoch": 11.715481171548117, |
| "grad_norm": 1.0102859735488892, |
| "learning_rate": 2.0362769991485514e-06, |
| "logits/chosen": 1.6868798732757568, |
| "logits/rejected": 1.743287444114685, |
| "logps/chosen": -80.24299621582031, |
| "logps/rejected": -91.3736343383789, |
| "loss": 0.5825, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.2850210964679718, |
| "rewards/margins": 0.30632922053337097, |
| "rewards/rejected": -0.5913503170013428, |
| "step": 350 |
| }, |
| { |
| "epoch": 12.05020920502092, |
| "grad_norm": 1.0598143339157104, |
| "learning_rate": 1.8893574139429226e-06, |
| "logits/chosen": 1.6859172582626343, |
| "logits/rejected": 1.7635313272476196, |
| "logps/chosen": -88.87372589111328, |
| "logps/rejected": -93.02122497558594, |
| "loss": 0.5819, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -0.317017138004303, |
| "rewards/margins": 0.2608889639377594, |
| "rewards/rejected": -0.5779060125350952, |
| "step": 360 |
| }, |
| { |
| "epoch": 12.384937238493723, |
| "grad_norm": 1.0845669507980347, |
| "learning_rate": 1.744648961076068e-06, |
| "logits/chosen": 1.7980632781982422, |
| "logits/rejected": 1.8205795288085938, |
| "logps/chosen": -83.31995391845703, |
| "logps/rejected": -87.28046417236328, |
| "loss": 0.5737, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.3205794394016266, |
| "rewards/margins": 0.24214692413806915, |
| "rewards/rejected": -0.5627263784408569, |
| "step": 370 |
| }, |
| { |
| "epoch": 12.719665271966527, |
| "grad_norm": 1.180468201637268, |
| "learning_rate": 1.602675628797636e-06, |
| "logits/chosen": 1.8093292713165283, |
| "logits/rejected": 1.8195394277572632, |
| "logps/chosen": -99.64051055908203, |
| "logps/rejected": -82.13116455078125, |
| "loss": 0.5655, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.28717803955078125, |
| "rewards/margins": 0.355060875415802, |
| "rewards/rejected": -0.6422389149665833, |
| "step": 380 |
| }, |
| { |
| "epoch": 13.05439330543933, |
| "grad_norm": 1.0443357229232788, |
| "learning_rate": 1.4639515015056205e-06, |
| "logits/chosen": 1.7365925312042236, |
| "logits/rejected": 1.7514903545379639, |
| "logps/chosen": -96.92813110351562, |
| "logps/rejected": -83.5425796508789, |
| "loss": 0.5654, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.3064219653606415, |
| "rewards/margins": 0.38930463790893555, |
| "rewards/rejected": -0.6957265138626099, |
| "step": 390 |
| }, |
| { |
| "epoch": 13.389121338912133, |
| "grad_norm": 1.2010602951049805, |
| "learning_rate": 1.328978898250525e-06, |
| "logits/chosen": 1.7649974822998047, |
| "logits/rejected": 1.7864068746566772, |
| "logps/chosen": -78.26787567138672, |
| "logps/rejected": -82.81205749511719, |
| "loss": 0.5713, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -0.32541388273239136, |
| "rewards/margins": 0.34498968720436096, |
| "rewards/rejected": -0.6704035997390747, |
| "step": 400 |
| }, |
| { |
| "epoch": 13.723849372384937, |
| "grad_norm": 1.656805396080017, |
| "learning_rate": 1.198246553841744e-06, |
| "logits/chosen": 1.6543798446655273, |
| "logits/rejected": 1.700377106666565, |
| "logps/chosen": -88.14425659179688, |
| "logps/rejected": -86.7034912109375, |
| "loss": 0.5569, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -0.307409405708313, |
| "rewards/margins": 0.346424400806427, |
| "rewards/rejected": -0.65383380651474, |
| "step": 410 |
| }, |
| { |
| "epoch": 14.05857740585774, |
| "grad_norm": 1.3017597198486328, |
| "learning_rate": 1.0722278491423998e-06, |
| "logits/chosen": 1.861400842666626, |
| "logits/rejected": 1.7919048070907593, |
| "logps/chosen": -101.43531036376953, |
| "logps/rejected": -77.99034881591797, |
| "loss": 0.567, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.34600889682769775, |
| "rewards/margins": 0.2590746283531189, |
| "rewards/rejected": -0.6050835251808167, |
| "step": 420 |
| }, |
| { |
| "epoch": 14.393305439330543, |
| "grad_norm": 1.0269688367843628, |
| "learning_rate": 9.513790969606926e-07, |
| "logits/chosen": 1.809483289718628, |
| "logits/rejected": 1.8456366062164307, |
| "logps/chosen": -102.51566314697266, |
| "logps/rejected": -104.2486343383789, |
| "loss": 0.5605, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -0.33636271953582764, |
| "rewards/margins": 0.3720197379589081, |
| "rewards/rejected": -0.7083825469017029, |
| "step": 430 |
| }, |
| { |
| "epoch": 14.728033472803347, |
| "grad_norm": 1.0176596641540527, |
| "learning_rate": 8.361378897445643e-07, |
| "logits/chosen": 1.8125699758529663, |
| "logits/rejected": 1.819248914718628, |
| "logps/chosen": -83.56723022460938, |
| "logps/rejected": -81.31576538085938, |
| "loss": 0.5549, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.321399986743927, |
| "rewards/margins": 0.2679772973060608, |
| "rewards/rejected": -0.5893772840499878, |
| "step": 440 |
| }, |
| { |
| "epoch": 15.06276150627615, |
| "grad_norm": 1.1313916444778442, |
| "learning_rate": 7.269215150626391e-07, |
| "logits/chosen": 1.7356494665145874, |
| "logits/rejected": 1.793581247329712, |
| "logps/chosen": -80.0768051147461, |
| "logps/rejected": -79.23683166503906, |
| "loss": 0.5606, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -0.34324222803115845, |
| "rewards/margins": 0.3662562370300293, |
| "rewards/rejected": -0.7094983458518982, |
| "step": 450 |
| }, |
| { |
| "epoch": 15.397489539748953, |
| "grad_norm": 1.1418852806091309, |
| "learning_rate": 6.241254446089942e-07, |
| "logits/chosen": 1.7430709600448608, |
| "logits/rejected": 1.7953869104385376, |
| "logps/chosen": -82.4347152709961, |
| "logps/rejected": -82.4286880493164, |
| "loss": 0.5545, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.3615425229072571, |
| "rewards/margins": 0.35559993982315063, |
| "rewards/rejected": -0.7171424031257629, |
| "step": 460 |
| }, |
| { |
| "epoch": 15.732217573221757, |
| "grad_norm": 1.1025134325027466, |
| "learning_rate": 5.281219022030423e-07, |
| "logits/chosen": 1.655470848083496, |
| "logits/rejected": 1.692399024963379, |
| "logps/chosen": -101.62381744384766, |
| "logps/rejected": -96.6092300415039, |
| "loss": 0.5525, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -0.29976749420166016, |
| "rewards/margins": 0.3946015238761902, |
| "rewards/rejected": -0.6943690776824951, |
| "step": 470 |
| }, |
| { |
| "epoch": 16.06694560669456, |
| "grad_norm": 1.2202156782150269, |
| "learning_rate": 4.392585159698087e-07, |
| "logits/chosen": 1.7376701831817627, |
| "logits/rejected": 1.793471097946167, |
| "logps/chosen": -89.61154174804688, |
| "logps/rejected": -94.71632385253906, |
| "loss": 0.5499, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.31332069635391235, |
| "rewards/margins": 0.4189217984676361, |
| "rewards/rejected": -0.7322424650192261, |
| "step": 480 |
| }, |
| { |
| "epoch": 16.401673640167363, |
| "grad_norm": 1.1149810552597046, |
| "learning_rate": 3.578570595810274e-07, |
| "logits/chosen": 1.7718391418457031, |
| "logits/rejected": 1.8273475170135498, |
| "logps/chosen": -83.96971130371094, |
| "logps/rejected": -92.80200958251953, |
| "loss": 0.5471, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.2852213978767395, |
| "rewards/margins": 0.3351207673549652, |
| "rewards/rejected": -0.6203421354293823, |
| "step": 490 |
| }, |
| { |
| "epoch": 16.736401673640167, |
| "grad_norm": 1.0757339000701904, |
| "learning_rate": 2.8421228711503127e-07, |
| "logits/chosen": 1.7726856470108032, |
| "logits/rejected": 1.836022973060608, |
| "logps/chosen": -78.27845764160156, |
| "logps/rejected": -83.48805236816406, |
| "loss": 0.5539, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.34817034006118774, |
| "rewards/margins": 0.2852809429168701, |
| "rewards/rejected": -0.6334512829780579, |
| "step": 500 |
| }, |
| { |
| "epoch": 17.07112970711297, |
| "grad_norm": 1.1488370895385742, |
| "learning_rate": 2.1859086575439225e-07, |
| "logits/chosen": 1.7012341022491455, |
| "logits/rejected": 1.78024423122406, |
| "logps/chosen": -84.58718872070312, |
| "logps/rejected": -86.95068359375, |
| "loss": 0.549, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -0.291012704372406, |
| "rewards/margins": 0.4876977801322937, |
| "rewards/rejected": -0.7787104845046997, |
| "step": 510 |
| }, |
| { |
| "epoch": 17.405857740585773, |
| "grad_norm": 1.1710774898529053, |
| "learning_rate": 1.6123041018599766e-07, |
| "logits/chosen": 1.7162996530532837, |
| "logits/rejected": 1.7306814193725586, |
| "logps/chosen": -93.86546325683594, |
| "logps/rejected": -84.5029067993164, |
| "loss": 0.5567, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -0.26384133100509644, |
| "rewards/margins": 0.3909001052379608, |
| "rewards/rejected": -0.6547414064407349, |
| "step": 520 |
| }, |
| { |
| "epoch": 17.740585774058577, |
| "grad_norm": 1.0457745790481567, |
| "learning_rate": 1.1233862220001168e-07, |
| "logits/chosen": 1.729026436805725, |
| "logits/rejected": 1.7938979864120483, |
| "logps/chosen": -86.14263916015625, |
| "logps/rejected": -109.89949798583984, |
| "loss": 0.555, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -0.37593385577201843, |
| "rewards/margins": 0.41020965576171875, |
| "rewards/rejected": -0.7861436009407043, |
| "step": 530 |
| }, |
| { |
| "epoch": 18.07531380753138, |
| "grad_norm": 1.1671756505966187, |
| "learning_rate": 7.209253860320897e-08, |
| "logits/chosen": 1.797410011291504, |
| "logits/rejected": 1.7258751392364502, |
| "logps/chosen": -101.58478546142578, |
| "logps/rejected": -81.59870910644531, |
| "loss": 0.5493, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.36429405212402344, |
| "rewards/margins": 0.3445179760456085, |
| "rewards/rejected": -0.7088119983673096, |
| "step": 540 |
| }, |
| { |
| "epoch": 18.410041841004183, |
| "grad_norm": 1.0843878984451294, |
| "learning_rate": 4.063789016999331e-08, |
| "logits/chosen": 1.7526544332504272, |
| "logits/rejected": 1.7694238424301147, |
| "logps/chosen": -91.79996490478516, |
| "logps/rejected": -88.14305877685547, |
| "loss": 0.5472, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.432400643825531, |
| "rewards/margins": 0.34698057174682617, |
| "rewards/rejected": -0.7793812155723572, |
| "step": 550 |
| }, |
| { |
| "epoch": 18.744769874476987, |
| "grad_norm": 1.1687026023864746, |
| "learning_rate": 1.808857395232788e-08, |
| "logits/chosen": 1.8021119832992554, |
| "logits/rejected": 1.8404823541641235, |
| "logps/chosen": -101.54361724853516, |
| "logps/rejected": -102.21680450439453, |
| "loss": 0.5574, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -0.32780179381370544, |
| "rewards/margins": 0.34786924719810486, |
| "rewards/rejected": -0.6756710410118103, |
| "step": 560 |
| }, |
| { |
| "epoch": 19.07949790794979, |
| "grad_norm": 1.3505330085754395, |
| "learning_rate": 4.526240859345499e-09, |
| "logits/chosen": 1.7214359045028687, |
| "logits/rejected": 1.7881813049316406, |
| "logps/chosen": -74.24882507324219, |
| "logps/rejected": -99.3295669555664, |
| "loss": 0.5463, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -0.4312325417995453, |
| "rewards/margins": 0.3562263250350952, |
| "rewards/rejected": -0.7874588966369629, |
| "step": 570 |
| }, |
| { |
| "epoch": 19.414225941422593, |
| "grad_norm": 1.1412781476974487, |
| "learning_rate": 0.0, |
| "logits/chosen": 1.8473981618881226, |
| "logits/rejected": 1.784250259399414, |
| "logps/chosen": -86.49626159667969, |
| "logps/rejected": -79.94244384765625, |
| "loss": 0.5464, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -0.3426724970340729, |
| "rewards/margins": 0.37268632650375366, |
| "rewards/rejected": -0.7153588533401489, |
| "step": 580 |
| }, |
| { |
| "epoch": 19.414225941422593, |
| "step": 580, |
| "total_flos": 1.9755070908268544e+18, |
| "train_loss": 0.6148436521661693, |
| "train_runtime": 4276.8425, |
| "train_samples_per_second": 8.922, |
| "train_steps_per_second": 0.136 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 580, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 20, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.9755070908268544e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|