| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9995638901003053, | |
| "eval_steps": 100, | |
| "global_step": 573, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0017444395987788923, | |
| "grad_norm": 29.747415913018244, | |
| "learning_rate": 8.620689655172413e-09, | |
| "logits/chosen": -2.9494614601135254, | |
| "logits/rejected": -2.9794743061065674, | |
| "logps/chosen": -1.7114274501800537, | |
| "logps/rejected": -1.7004308700561523, | |
| "loss": 1.5625, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.01744439598778892, | |
| "grad_norm": 25.590469762768826, | |
| "learning_rate": 8.620689655172414e-08, | |
| "logits/chosen": -2.272578477859497, | |
| "logits/rejected": -2.341148614883423, | |
| "logps/chosen": -1.5401281118392944, | |
| "logps/rejected": -1.708099126815796, | |
| "loss": 1.5627, | |
| "rewards/accuracies": 0.4513888955116272, | |
| "rewards/chosen": 0.000227387499762699, | |
| "rewards/margins": 0.0002251576370326802, | |
| "rewards/rejected": 2.2298513613350224e-06, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.03488879197557784, | |
| "grad_norm": 25.207047594807733, | |
| "learning_rate": 1.7241379310344828e-07, | |
| "logits/chosen": -2.485241413116455, | |
| "logits/rejected": -2.5959410667419434, | |
| "logps/chosen": -1.570024013519287, | |
| "logps/rejected": -1.7283653020858765, | |
| "loss": 1.5607, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.00038363353814929724, | |
| "rewards/margins": -9.226792826666497e-06, | |
| "rewards/rejected": -0.0003744067216757685, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.05233318796336677, | |
| "grad_norm": 31.534894880772583, | |
| "learning_rate": 2.586206896551724e-07, | |
| "logits/chosen": -2.1619415283203125, | |
| "logits/rejected": -2.2504525184631348, | |
| "logps/chosen": -1.6232503652572632, | |
| "logps/rejected": -1.7250868082046509, | |
| "loss": 1.5432, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.0028999350033700466, | |
| "rewards/margins": 0.0026171673089265823, | |
| "rewards/rejected": -0.005517102312296629, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.06977758395115569, | |
| "grad_norm": 26.41926451522702, | |
| "learning_rate": 3.4482758620689656e-07, | |
| "logits/chosen": -2.274902582168579, | |
| "logits/rejected": -2.4139533042907715, | |
| "logps/chosen": -1.5764671564102173, | |
| "logps/rejected": -1.6893203258514404, | |
| "loss": 1.509, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.007333276327699423, | |
| "rewards/margins": 0.00940671842545271, | |
| "rewards/rejected": -0.016739998012781143, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.08722197993894461, | |
| "grad_norm": 24.178200461155868, | |
| "learning_rate": 4.310344827586206e-07, | |
| "logits/chosen": -2.3129639625549316, | |
| "logits/rejected": -2.319011926651001, | |
| "logps/chosen": -1.5989129543304443, | |
| "logps/rejected": -1.7941827774047852, | |
| "loss": 1.4196, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.026371877640485764, | |
| "rewards/margins": 0.03227262943983078, | |
| "rewards/rejected": -0.058644503355026245, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.10466637592673354, | |
| "grad_norm": 38.93662482016604, | |
| "learning_rate": 4.999813941333237e-07, | |
| "logits/chosen": -2.455597400665283, | |
| "logits/rejected": -2.459132194519043, | |
| "logps/chosen": -1.9030494689941406, | |
| "logps/rejected": -2.0835158824920654, | |
| "loss": 1.3361, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.10062487423419952, | |
| "rewards/margins": 0.051896799355745316, | |
| "rewards/rejected": -0.15252165496349335, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.12211077191452246, | |
| "grad_norm": 35.7973285976575, | |
| "learning_rate": 4.99330479543236e-07, | |
| "logits/chosen": -2.394988536834717, | |
| "logits/rejected": -2.4527833461761475, | |
| "logps/chosen": -1.8048909902572632, | |
| "logps/rejected": -2.0864245891571045, | |
| "loss": 1.3198, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -0.11194689571857452, | |
| "rewards/margins": 0.057884473353624344, | |
| "rewards/rejected": -0.16983136534690857, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.13955516790231137, | |
| "grad_norm": 40.16495399833959, | |
| "learning_rate": 4.977520391931686e-07, | |
| "logits/chosen": -2.266960620880127, | |
| "logits/rejected": -2.373418092727661, | |
| "logps/chosen": -1.8538503646850586, | |
| "logps/rejected": -2.1406590938568115, | |
| "loss": 1.2319, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.1073283776640892, | |
| "rewards/margins": 0.09412004053592682, | |
| "rewards/rejected": -0.2014484405517578, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.1569995638901003, | |
| "grad_norm": 46.075075778371485, | |
| "learning_rate": 4.952519449848961e-07, | |
| "logits/chosen": -2.337573289871216, | |
| "logits/rejected": -2.3125760555267334, | |
| "logps/chosen": -2.0118062496185303, | |
| "logps/rejected": -2.2410480976104736, | |
| "loss": 1.187, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -0.15836110711097717, | |
| "rewards/margins": 0.09043124318122864, | |
| "rewards/rejected": -0.248792365193367, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.17444395987788922, | |
| "grad_norm": 35.52679424110663, | |
| "learning_rate": 4.9183949743308e-07, | |
| "logits/chosen": -2.266244411468506, | |
| "logits/rejected": -2.32613205909729, | |
| "logps/chosen": -1.9573999643325806, | |
| "logps/rejected": -2.245410919189453, | |
| "loss": 1.1749, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.15664055943489075, | |
| "rewards/margins": 0.1040625348687172, | |
| "rewards/rejected": -0.26070311665534973, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.17444395987788922, | |
| "eval_logits/chosen": -2.5133261680603027, | |
| "eval_logits/rejected": -2.567593812942505, | |
| "eval_logps/chosen": -2.063753604888916, | |
| "eval_logps/rejected": -2.446528673171997, | |
| "eval_loss": 1.0763452053070068, | |
| "eval_rewards/accuracies": 0.7891566157341003, | |
| "eval_rewards/chosen": -0.17315217852592468, | |
| "eval_rewards/margins": 0.1388266682624817, | |
| "eval_rewards/rejected": -0.31197884678840637, | |
| "eval_runtime": 115.6498, | |
| "eval_samples_per_second": 22.853, | |
| "eval_steps_per_second": 0.718, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.19188835586567815, | |
| "grad_norm": 34.94980526994547, | |
| "learning_rate": 4.875273910667434e-07, | |
| "logits/chosen": -2.5348660945892334, | |
| "logits/rejected": -2.541141986846924, | |
| "logps/chosen": -2.0890326499938965, | |
| "logps/rejected": -2.563629627227783, | |
| "loss": 1.0846, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.21275702118873596, | |
| "rewards/margins": 0.15415263175964355, | |
| "rewards/rejected": -0.3669096529483795, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.2093327518534671, | |
| "grad_norm": 36.111192447827186, | |
| "learning_rate": 4.823316672047889e-07, | |
| "logits/chosen": -2.482753276824951, | |
| "logits/rejected": -2.542470932006836, | |
| "logps/chosen": -2.19625186920166, | |
| "logps/rejected": -2.7020630836486816, | |
| "loss": 1.0846, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.20946833491325378, | |
| "rewards/margins": 0.1580672562122345, | |
| "rewards/rejected": -0.3675355613231659, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.226777147841256, | |
| "grad_norm": 35.9198962249818, | |
| "learning_rate": 4.762716542812394e-07, | |
| "logits/chosen": -2.5084481239318848, | |
| "logits/rejected": -2.484107732772827, | |
| "logps/chosen": -2.347628116607666, | |
| "logps/rejected": -2.8013808727264404, | |
| "loss": 1.0397, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -0.2825542390346527, | |
| "rewards/margins": 0.14921075105667114, | |
| "rewards/rejected": -0.43176499009132385, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.24422154382904493, | |
| "grad_norm": 33.812969950120994, | |
| "learning_rate": 4.693698959421934e-07, | |
| "logits/chosen": -2.4390926361083984, | |
| "logits/rejected": -2.5850162506103516, | |
| "logps/chosen": -2.2307400703430176, | |
| "logps/rejected": -2.76652193069458, | |
| "loss": 1.1052, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.2658195495605469, | |
| "rewards/margins": 0.15902426838874817, | |
| "rewards/rejected": -0.42484384775161743, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.26166593981683384, | |
| "grad_norm": 38.15404463326893, | |
| "learning_rate": 4.616520671819811e-07, | |
| "logits/chosen": -2.5228819847106934, | |
| "logits/rejected": -2.5446176528930664, | |
| "logps/chosen": -2.410109043121338, | |
| "logps/rejected": -2.782252550125122, | |
| "loss": 1.0492, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -0.31940045952796936, | |
| "rewards/margins": 0.1365361362695694, | |
| "rewards/rejected": -0.45593661069869995, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.27911033580462274, | |
| "grad_norm": 37.67274567595706, | |
| "learning_rate": 4.531468788304991e-07, | |
| "logits/chosen": -2.496253252029419, | |
| "logits/rejected": -2.5734314918518066, | |
| "logps/chosen": -2.237454414367676, | |
| "logps/rejected": -2.7140257358551025, | |
| "loss": 1.0272, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -0.27850186824798584, | |
| "rewards/margins": 0.15557342767715454, | |
| "rewards/rejected": -0.4340752959251404, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.2965547317924117, | |
| "grad_norm": 49.34543198418806, | |
| "learning_rate": 4.438859707470375e-07, | |
| "logits/chosen": -2.530522108078003, | |
| "logits/rejected": -2.5276732444763184, | |
| "logps/chosen": -2.332202434539795, | |
| "logps/rejected": -2.7523207664489746, | |
| "loss": 1.0392, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -0.2833098769187927, | |
| "rewards/margins": 0.1833934336900711, | |
| "rewards/rejected": -0.466703325510025, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.3139991277802006, | |
| "grad_norm": 44.17730811384066, | |
| "learning_rate": 4.3390379411792524e-07, | |
| "logits/chosen": -2.4329633712768555, | |
| "logits/rejected": -2.5291526317596436, | |
| "logps/chosen": -2.313528537750244, | |
| "logps/rejected": -2.7951102256774902, | |
| "loss": 1.0006, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -0.3000316917896271, | |
| "rewards/margins": 0.158560112118721, | |
| "rewards/rejected": -0.4585917890071869, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.3314435237679895, | |
| "grad_norm": 47.45322605710491, | |
| "learning_rate": 4.23237483295854e-07, | |
| "logits/chosen": -2.573887586593628, | |
| "logits/rejected": -2.7052321434020996, | |
| "logps/chosen": -2.364560127258301, | |
| "logps/rejected": -2.8203182220458984, | |
| "loss": 1.0715, | |
| "rewards/accuracies": 0.793749988079071, | |
| "rewards/chosen": -0.3148408532142639, | |
| "rewards/margins": 0.16490553319454193, | |
| "rewards/rejected": -0.47974634170532227, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.34888791975577843, | |
| "grad_norm": 36.54817057924313, | |
| "learning_rate": 4.119267176576475e-07, | |
| "logits/chosen": -2.688988208770752, | |
| "logits/rejected": -2.7872579097747803, | |
| "logps/chosen": -2.3317620754241943, | |
| "logps/rejected": -2.948866844177246, | |
| "loss": 0.9802, | |
| "rewards/accuracies": 0.793749988079071, | |
| "rewards/chosen": -0.3193766474723816, | |
| "rewards/margins": 0.1907302290201187, | |
| "rewards/rejected": -0.5101069211959839, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.34888791975577843, | |
| "eval_logits/chosen": -2.7230396270751953, | |
| "eval_logits/rejected": -2.7872889041900635, | |
| "eval_logps/chosen": -2.4268717765808105, | |
| "eval_logps/rejected": -2.992184638977051, | |
| "eval_loss": 0.9500909447669983, | |
| "eval_rewards/accuracies": 0.8012048006057739, | |
| "eval_rewards/chosen": -0.31839942932128906, | |
| "eval_rewards/margins": 0.2118416577577591, | |
| "eval_rewards/rejected": -0.530241072177887, | |
| "eval_runtime": 115.5249, | |
| "eval_samples_per_second": 22.878, | |
| "eval_steps_per_second": 0.718, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.3663323157435674, | |
| "grad_norm": 28.997542149982813, | |
| "learning_rate": 4.000135739943734e-07, | |
| "logits/chosen": -2.5338735580444336, | |
| "logits/rejected": -2.5965075492858887, | |
| "logps/chosen": -2.3301658630371094, | |
| "logps/rejected": -2.923480749130249, | |
| "loss": 1.017, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -0.2939096689224243, | |
| "rewards/margins": 0.18346476554870605, | |
| "rewards/rejected": -0.477374404668808, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.3837767117313563, | |
| "grad_norm": 79.37314946467663, | |
| "learning_rate": 3.875423699829168e-07, | |
| "logits/chosen": -2.5550951957702637, | |
| "logits/rejected": -2.769721508026123, | |
| "logps/chosen": -2.377946615219116, | |
| "logps/rejected": -3.049376964569092, | |
| "loss": 0.9655, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -0.33743661642074585, | |
| "rewards/margins": 0.20777985453605652, | |
| "rewards/rejected": -0.5452165007591248, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.4012211077191452, | |
| "grad_norm": 38.56972950284146, | |
| "learning_rate": 3.7455949932131175e-07, | |
| "logits/chosen": -2.853245258331299, | |
| "logits/rejected": -2.7923905849456787, | |
| "logps/chosen": -2.5535457134246826, | |
| "logps/rejected": -3.186497688293457, | |
| "loss": 1.0501, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -0.37545865774154663, | |
| "rewards/margins": 0.21732258796691895, | |
| "rewards/rejected": -0.5927812457084656, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.4186655037069342, | |
| "grad_norm": 34.32127443417028, | |
| "learning_rate": 3.611132591411369e-07, | |
| "logits/chosen": -2.845720052719116, | |
| "logits/rejected": -2.912646770477295, | |
| "logps/chosen": -2.471998691558838, | |
| "logps/rejected": -2.9336795806884766, | |
| "loss": 0.9871, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -0.3210769295692444, | |
| "rewards/margins": 0.18005797266960144, | |
| "rewards/rejected": -0.5011348724365234, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.4361098996947231, | |
| "grad_norm": 36.8304920546943, | |
| "learning_rate": 3.4725367033901473e-07, | |
| "logits/chosen": -2.7427520751953125, | |
| "logits/rejected": -2.9484190940856934, | |
| "logps/chosen": -2.521329879760742, | |
| "logps/rejected": -3.1583688259124756, | |
| "loss": 1.018, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -0.3658570647239685, | |
| "rewards/margins": 0.20647919178009033, | |
| "rewards/rejected": -0.5723362565040588, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.453554295682512, | |
| "grad_norm": 29.383883913798257, | |
| "learning_rate": 3.3303229149558967e-07, | |
| "logits/chosen": -2.8211348056793213, | |
| "logits/rejected": -2.8875656127929688, | |
| "logps/chosen": -2.307373523712158, | |
| "logps/rejected": -2.855837821960449, | |
| "loss": 0.9869, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -0.29735279083251953, | |
| "rewards/margins": 0.1880948543548584, | |
| "rewards/rejected": -0.48544764518737793, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.4709986916703009, | |
| "grad_norm": 37.77204908034358, | |
| "learning_rate": 3.185020270742225e-07, | |
| "logits/chosen": -2.7716403007507324, | |
| "logits/rejected": -2.8079323768615723, | |
| "logps/chosen": -2.386676549911499, | |
| "logps/rejected": -2.7951130867004395, | |
| "loss": 0.9871, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -0.32528942823410034, | |
| "rewards/margins": 0.15507872402668, | |
| "rewards/rejected": -0.48036813735961914, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.48844308765808986, | |
| "grad_norm": 28.328010250614053, | |
| "learning_rate": 3.0371693061291146e-07, | |
| "logits/chosen": -2.780062437057495, | |
| "logits/rejected": -2.9711029529571533, | |
| "logps/chosen": -2.551729679107666, | |
| "logps/rejected": -3.2051796913146973, | |
| "loss": 1.0056, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -0.40202435851097107, | |
| "rewards/margins": 0.1922958493232727, | |
| "rewards/rejected": -0.5943201780319214, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.5058874836458788, | |
| "grad_norm": 32.14871116699808, | |
| "learning_rate": 2.8873200364158106e-07, | |
| "logits/chosen": -2.680164337158203, | |
| "logits/rejected": -2.803938388824463, | |
| "logps/chosen": -2.43742036819458, | |
| "logps/rejected": -2.9839630126953125, | |
| "loss": 0.9837, | |
| "rewards/accuracies": 0.831250011920929, | |
| "rewards/chosen": -0.32910409569740295, | |
| "rewards/margins": 0.2097328007221222, | |
| "rewards/rejected": -0.5388368368148804, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.5233318796336677, | |
| "grad_norm": 40.52094893481022, | |
| "learning_rate": 2.7360299107277767e-07, | |
| "logits/chosen": -2.7708568572998047, | |
| "logits/rejected": -2.821788787841797, | |
| "logps/chosen": -2.4744372367858887, | |
| "logps/rejected": -3.086942672729492, | |
| "loss": 0.9548, | |
| "rewards/accuracies": 0.793749988079071, | |
| "rewards/chosen": -0.3568132221698761, | |
| "rewards/margins": 0.20053787529468536, | |
| "rewards/rejected": -0.5573510527610779, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5233318796336677, | |
| "eval_logits/chosen": -2.808656692504883, | |
| "eval_logits/rejected": -2.8788304328918457, | |
| "eval_logps/chosen": -2.571028709411621, | |
| "eval_logps/rejected": -3.17358660697937, | |
| "eval_loss": 0.9136135578155518, | |
| "eval_rewards/accuracies": 0.8162650465965271, | |
| "eval_rewards/chosen": -0.3760621249675751, | |
| "eval_rewards/margins": 0.22673983871936798, | |
| "eval_rewards/rejected": -0.6028019785881042, | |
| "eval_runtime": 115.45, | |
| "eval_samples_per_second": 22.893, | |
| "eval_steps_per_second": 0.719, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5407762756214566, | |
| "grad_norm": 39.89301839500131, | |
| "learning_rate": 2.5838617382693414e-07, | |
| "logits/chosen": -2.948072671890259, | |
| "logits/rejected": -2.9336767196655273, | |
| "logps/chosen": -2.5350139141082764, | |
| "logps/rejected": -3.0460660457611084, | |
| "loss": 1.0184, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -0.37652358412742615, | |
| "rewards/margins": 0.18868541717529297, | |
| "rewards/rejected": -0.5652090311050415, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.5582206716092455, | |
| "grad_norm": 36.83353696515594, | |
| "learning_rate": 2.4313815946364877e-07, | |
| "logits/chosen": -2.8387434482574463, | |
| "logits/rejected": -2.978883743286133, | |
| "logps/chosen": -2.4741597175598145, | |
| "logps/rejected": -3.0683841705322266, | |
| "loss": 0.9653, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -0.3760759234428406, | |
| "rewards/margins": 0.1771194189786911, | |
| "rewards/rejected": -0.5531953573226929, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.5756650675970345, | |
| "grad_norm": 30.76657514726095, | |
| "learning_rate": 2.2791567159784566e-07, | |
| "logits/chosen": -2.762192487716675, | |
| "logits/rejected": -2.7734150886535645, | |
| "logps/chosen": -2.4779891967773438, | |
| "logps/rejected": -2.9213876724243164, | |
| "loss": 0.9814, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -0.3409067988395691, | |
| "rewards/margins": 0.18150492012500763, | |
| "rewards/rejected": -0.5224117040634155, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.5931094635848234, | |
| "grad_norm": 32.87393386922799, | |
| "learning_rate": 2.12775338884202e-07, | |
| "logits/chosen": -2.886357307434082, | |
| "logits/rejected": -2.9018540382385254, | |
| "logps/chosen": -2.4813778400421143, | |
| "logps/rejected": -2.9721169471740723, | |
| "loss": 0.9893, | |
| "rewards/accuracies": 0.793749988079071, | |
| "rewards/chosen": -0.3222178816795349, | |
| "rewards/margins": 0.20408673584461212, | |
| "rewards/rejected": -0.5263046622276306, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.6105538595726123, | |
| "grad_norm": 30.494923835102345, | |
| "learning_rate": 1.977734843548328e-07, | |
| "logits/chosen": -2.824707508087158, | |
| "logits/rejected": -2.79854154586792, | |
| "logps/chosen": -2.3759117126464844, | |
| "logps/rejected": -2.975604295730591, | |
| "loss": 0.9863, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -0.3144580125808716, | |
| "rewards/margins": 0.21930566430091858, | |
| "rewards/rejected": -0.5337637066841125, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.6279982555604012, | |
| "grad_norm": 26.8742332382294, | |
| "learning_rate": 1.8296591589391226e-07, | |
| "logits/chosen": -2.6628756523132324, | |
| "logits/rejected": -2.7284369468688965, | |
| "logps/chosen": -2.4052791595458984, | |
| "logps/rejected": -2.870692253112793, | |
| "loss": 0.9646, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.310857355594635, | |
| "rewards/margins": 0.15339362621307373, | |
| "rewards/rejected": -0.46425098180770874, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.6454426515481901, | |
| "grad_norm": 35.52686655329713, | |
| "learning_rate": 1.684077186286792e-07, | |
| "logits/chosen": -2.50087308883667, | |
| "logits/rejected": -2.6514532566070557, | |
| "logps/chosen": -2.2871201038360596, | |
| "logps/rejected": -2.937791585922241, | |
| "loss": 0.9877, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -0.3152330219745636, | |
| "rewards/margins": 0.2090420424938202, | |
| "rewards/rejected": -0.5242750644683838, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.662887047535979, | |
| "grad_norm": 30.399222288686712, | |
| "learning_rate": 1.5415305000914585e-07, | |
| "logits/chosen": -2.7632899284362793, | |
| "logits/rejected": -2.886943817138672, | |
| "logps/chosen": -2.4425692558288574, | |
| "logps/rejected": -3.0246245861053467, | |
| "loss": 0.9226, | |
| "rewards/accuracies": 0.793749988079071, | |
| "rewards/chosen": -0.33751311898231506, | |
| "rewards/margins": 0.1871272623538971, | |
| "rewards/rejected": -0.5246403813362122, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.680331443523768, | |
| "grad_norm": 33.76165947663638, | |
| "learning_rate": 1.4025493833882643e-07, | |
| "logits/chosen": -2.622499465942383, | |
| "logits/rejected": -2.6712939739227295, | |
| "logps/chosen": -2.521423816680908, | |
| "logps/rejected": -3.01672625541687, | |
| "loss": 0.9625, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.39204803109169006, | |
| "rewards/margins": 0.18184103071689606, | |
| "rewards/rejected": -0.5738890767097473, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.6977758395115569, | |
| "grad_norm": 39.46957147631396, | |
| "learning_rate": 1.267650855059656e-07, | |
| "logits/chosen": -2.7575411796569824, | |
| "logits/rejected": -2.8080406188964844, | |
| "logps/chosen": -2.511035203933716, | |
| "logps/rejected": -2.9791462421417236, | |
| "loss": 0.9834, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -0.3435077667236328, | |
| "rewards/margins": 0.1896781176328659, | |
| "rewards/rejected": -0.5331858396530151, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.6977758395115569, | |
| "eval_logits/chosen": -2.8667454719543457, | |
| "eval_logits/rejected": -2.937051773071289, | |
| "eval_logps/chosen": -2.4769790172576904, | |
| "eval_logps/rejected": -3.0509371757507324, | |
| "eval_loss": 0.9041184186935425, | |
| "eval_rewards/accuracies": 0.8042168617248535, | |
| "eval_rewards/chosen": -0.3384423553943634, | |
| "eval_rewards/margins": 0.21529993414878845, | |
| "eval_rewards/rejected": -0.5537422895431519, | |
| "eval_runtime": 115.5756, | |
| "eval_samples_per_second": 22.868, | |
| "eval_steps_per_second": 0.718, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.7152202354993459, | |
| "grad_norm": 35.58611489394968, | |
| "learning_rate": 1.1373367464911796e-07, | |
| "logits/chosen": -2.7775580883026123, | |
| "logits/rejected": -2.8066816329956055, | |
| "logps/chosen": -2.44441294670105, | |
| "logps/rejected": -2.940340042114258, | |
| "loss": 0.966, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.3467921018600464, | |
| "rewards/margins": 0.17160984873771667, | |
| "rewards/rejected": -0.5184019804000854, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.7326646314871348, | |
| "grad_norm": 31.573311664278364, | |
| "learning_rate": 1.0120918347257668e-07, | |
| "logits/chosen": -2.912554979324341, | |
| "logits/rejected": -2.9200565814971924, | |
| "logps/chosen": -2.5111441612243652, | |
| "logps/rejected": -3.0291829109191895, | |
| "loss": 1.0018, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.3495512306690216, | |
| "rewards/margins": 0.18085847795009613, | |
| "rewards/rejected": -0.5304096937179565, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.7501090274749237, | |
| "grad_norm": 38.092625083892315, | |
| "learning_rate": 8.923820390612991e-08, | |
| "logits/chosen": -2.8513288497924805, | |
| "logits/rejected": -2.847677707672119, | |
| "logps/chosen": -2.44001841545105, | |
| "logps/rejected": -3.063814640045166, | |
| "loss": 0.9975, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -0.3544333577156067, | |
| "rewards/margins": 0.22024652361869812, | |
| "rewards/rejected": -0.5746799111366272, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.7675534234627126, | |
| "grad_norm": 30.479406895571362, | |
| "learning_rate": 7.786526878002125e-08, | |
| "logits/chosen": -2.714656114578247, | |
| "logits/rejected": -2.838855743408203, | |
| "logps/chosen": -2.3737549781799316, | |
| "logps/rejected": -3.0006814002990723, | |
| "loss": 0.9624, | |
| "rewards/accuracies": 0.793749988079071, | |
| "rewards/chosen": -0.32796674966812134, | |
| "rewards/margins": 0.20443764328956604, | |
| "rewards/rejected": -0.532404363155365, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.7849978194505015, | |
| "grad_norm": 33.130716501269866, | |
| "learning_rate": 6.713268615989654e-08, | |
| "logits/chosen": -2.9494526386260986, | |
| "logits/rejected": -2.9998550415039062, | |
| "logps/chosen": -2.509775161743164, | |
| "logps/rejected": -3.015613555908203, | |
| "loss": 0.9668, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -0.36944177746772766, | |
| "rewards/margins": 0.1895218938589096, | |
| "rewards/rejected": -0.5589635968208313, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.8024422154382904, | |
| "grad_norm": 33.25889406980063, | |
| "learning_rate": 5.7080381958020976e-08, | |
| "logits/chosen": -2.674802303314209, | |
| "logits/rejected": -2.8434243202209473, | |
| "logps/chosen": -2.5848803520202637, | |
| "logps/rejected": -3.1880083084106445, | |
| "loss": 0.9423, | |
| "rewards/accuracies": 0.862500011920929, | |
| "rewards/chosen": -0.376645028591156, | |
| "rewards/margins": 0.20410259068012238, | |
| "rewards/rejected": -0.5807476043701172, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.8198866114260793, | |
| "grad_norm": 30.106792425198947, | |
| "learning_rate": 4.774575140626316e-08, | |
| "logits/chosen": -2.7849488258361816, | |
| "logits/rejected": -2.903566360473633, | |
| "logps/chosen": -2.3999593257904053, | |
| "logps/rejected": -3.0301947593688965, | |
| "loss": 0.9476, | |
| "rewards/accuracies": 0.8062499761581421, | |
| "rewards/chosen": -0.3394695520401001, | |
| "rewards/margins": 0.21237297356128693, | |
| "rewards/rejected": -0.5518425703048706, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.8373310074138683, | |
| "grad_norm": 37.88465481105631, | |
| "learning_rate": 3.9163519943375965e-08, | |
| "logits/chosen": -2.8227665424346924, | |
| "logits/rejected": -2.9443516731262207, | |
| "logps/chosen": -2.4296436309814453, | |
| "logps/rejected": -2.984886646270752, | |
| "loss": 0.8745, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -0.35570722818374634, | |
| "rewards/margins": 0.16281530261039734, | |
| "rewards/rejected": -0.5185225605964661, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.8547754034016573, | |
| "grad_norm": 30.701274278534907, | |
| "learning_rate": 3.136561403408422e-08, | |
| "logits/chosen": -2.8463587760925293, | |
| "logits/rejected": -3.0236496925354004, | |
| "logps/chosen": -2.5174646377563477, | |
| "logps/rejected": -3.2128849029541016, | |
| "loss": 0.9445, | |
| "rewards/accuracies": 0.8062499761581421, | |
| "rewards/chosen": -0.37872380018234253, | |
| "rewards/margins": 0.22148160636425018, | |
| "rewards/rejected": -0.6002054810523987, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.8722197993894462, | |
| "grad_norm": 32.269326624165465, | |
| "learning_rate": 2.4381042400538593e-08, | |
| "logits/chosen": -2.871164560317993, | |
| "logits/rejected": -3.0147228240966797, | |
| "logps/chosen": -2.5578746795654297, | |
| "logps/rejected": -3.273369312286377, | |
| "loss": 0.9967, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.4019395709037781, | |
| "rewards/margins": 0.21136029064655304, | |
| "rewards/rejected": -0.6132999062538147, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.8722197993894462, | |
| "eval_logits/chosen": -2.959242105484009, | |
| "eval_logits/rejected": -3.029346227645874, | |
| "eval_logps/chosen": -2.568408966064453, | |
| "eval_logps/rejected": -3.185546398162842, | |
| "eval_loss": 0.8938218951225281, | |
| "eval_rewards/accuracies": 0.7891566157341003, | |
| "eval_rewards/chosen": -0.37501412630081177, | |
| "eval_rewards/margins": 0.23257188498973846, | |
| "eval_rewards/rejected": -0.6075860857963562, | |
| "eval_runtime": 115.5545, | |
| "eval_samples_per_second": 22.872, | |
| "eval_steps_per_second": 0.718, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.8896641953772351, | |
| "grad_norm": 35.20455083141279, | |
| "learning_rate": 1.8235788107963945e-08, | |
| "logits/chosen": -2.887869358062744, | |
| "logits/rejected": -2.820462703704834, | |
| "logps/chosen": -2.587712526321411, | |
| "logps/rejected": -3.1114964485168457, | |
| "loss": 0.9135, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -0.37796396017074585, | |
| "rewards/margins": 0.18830379843711853, | |
| "rewards/rejected": -0.5662677884101868, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.907108591365024, | |
| "grad_norm": 29.121513812900695, | |
| "learning_rate": 1.2952711905950376e-08, | |
| "logits/chosen": -2.8955085277557373, | |
| "logits/rejected": -2.908362627029419, | |
| "logps/chosen": -2.5724124908447266, | |
| "logps/rejected": -3.0688400268554688, | |
| "loss": 0.9963, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -0.39740028977394104, | |
| "rewards/margins": 0.17423763871192932, | |
| "rewards/rejected": -0.5716378688812256, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.9245529873528129, | |
| "grad_norm": 30.97739103991955, | |
| "learning_rate": 8.55146718496283e-09, | |
| "logits/chosen": -2.919581890106201, | |
| "logits/rejected": -2.8886361122131348, | |
| "logps/chosen": -2.599856376647949, | |
| "logps/rejected": -2.9770760536193848, | |
| "loss": 0.9551, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -0.3779660165309906, | |
| "rewards/margins": 0.17554500699043274, | |
| "rewards/rejected": -0.5535110235214233, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.9419973833406018, | |
| "grad_norm": 44.940688671381224, | |
| "learning_rate": 5.048426864438182e-09, | |
| "logits/chosen": -2.870760440826416, | |
| "logits/rejected": -2.946399688720703, | |
| "logps/chosen": -2.518998146057129, | |
| "logps/rejected": -3.1112277507781982, | |
| "loss": 0.9636, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.3970429301261902, | |
| "rewards/margins": 0.18053510785102844, | |
| "rewards/rejected": -0.577578067779541, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.9594417793283908, | |
| "grad_norm": 33.602758621496534, | |
| "learning_rate": 2.456622484449139e-09, | |
| "logits/chosen": -2.9532032012939453, | |
| "logits/rejected": -3.0146090984344482, | |
| "logps/chosen": -2.5181241035461426, | |
| "logps/rejected": -3.0724215507507324, | |
| "loss": 0.9918, | |
| "rewards/accuracies": 0.8187500238418579, | |
| "rewards/chosen": -0.35534483194351196, | |
| "rewards/margins": 0.210611030459404, | |
| "rewards/rejected": -0.5659558176994324, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.9768861753161797, | |
| "grad_norm": 40.276290803183436, | |
| "learning_rate": 7.85695727519492e-10, | |
| "logits/chosen": -2.8332512378692627, | |
| "logits/rejected": -2.8909554481506348, | |
| "logps/chosen": -2.6798324584960938, | |
| "logps/rejected": -3.1653189659118652, | |
| "loss": 0.957, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.3789713382720947, | |
| "rewards/margins": 0.16679120063781738, | |
| "rewards/rejected": -0.5457625389099121, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.9943305713039686, | |
| "grad_norm": 44.66764267907785, | |
| "learning_rate": 4.1862550931942575e-11, | |
| "logits/chosen": -2.9107937812805176, | |
| "logits/rejected": -2.905510425567627, | |
| "logps/chosen": -2.4948418140411377, | |
| "logps/rejected": -2.9094674587249756, | |
| "loss": 0.965, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -0.3739563822746277, | |
| "rewards/margins": 0.17874310910701752, | |
| "rewards/rejected": -0.5526994466781616, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.9995638901003053, | |
| "step": 573, | |
| "total_flos": 0.0, | |
| "train_loss": 1.0578667073141634, | |
| "train_runtime": 8389.7514, | |
| "train_samples_per_second": 8.743, | |
| "train_steps_per_second": 0.068 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 573, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |