| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9996283909327388, | |
| "eval_steps": 500, | |
| "global_step": 1345, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0505388331475288, | |
| "grad_norm": 4.774968147277832, | |
| "learning_rate": 5.037037037037037e-07, | |
| "logits/chosen": -0.535763680934906, | |
| "logits/rejected": -0.45358335971832275, | |
| "logps/chosen": -72.03621673583984, | |
| "logps/rejected": -10.970436096191406, | |
| "loss": 0.6914, | |
| "rewards/accuracies": 0.5533088445663452, | |
| "rewards/chosen": 0.003953414969146252, | |
| "rewards/margins": 0.003940199967473745, | |
| "rewards/rejected": 1.321489253314212e-05, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.1010776662950576, | |
| "grad_norm": 7.106871128082275, | |
| "learning_rate": 9.991735537190081e-07, | |
| "logits/chosen": -0.5598275661468506, | |
| "logits/rejected": -0.48031094670295715, | |
| "logps/chosen": -67.91443634033203, | |
| "logps/rejected": -10.759711265563965, | |
| "loss": 0.6763, | |
| "rewards/accuracies": 0.7647058963775635, | |
| "rewards/chosen": 0.030388537794351578, | |
| "rewards/margins": 0.03460656851530075, | |
| "rewards/rejected": -0.004218032583594322, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.1516164994425864, | |
| "grad_norm": 6.630987167358398, | |
| "learning_rate": 9.429752066115701e-07, | |
| "logits/chosen": -0.5454370379447937, | |
| "logits/rejected": -0.4578668177127838, | |
| "logps/chosen": -65.27257537841797, | |
| "logps/rejected": -11.141642570495605, | |
| "loss": 0.5834, | |
| "rewards/accuracies": 0.966911792755127, | |
| "rewards/chosen": 0.20791733264923096, | |
| "rewards/margins": 0.25071945786476135, | |
| "rewards/rejected": -0.04280214384198189, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.2021553325901152, | |
| "grad_norm": 2.813685417175293, | |
| "learning_rate": 8.867768595041321e-07, | |
| "logits/chosen": -0.5535087585449219, | |
| "logits/rejected": -0.48542195558547974, | |
| "logps/chosen": -59.47464370727539, | |
| "logps/rejected": -12.498270988464355, | |
| "loss": 0.4096, | |
| "rewards/accuracies": 0.9761029481887817, | |
| "rewards/chosen": 0.5964955687522888, | |
| "rewards/margins": 0.8128367066383362, | |
| "rewards/rejected": -0.21634113788604736, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.25269416573764397, | |
| "grad_norm": 1.8204927444458008, | |
| "learning_rate": 8.305785123966941e-07, | |
| "logits/chosen": -0.5383437871932983, | |
| "logits/rejected": -0.4763253629207611, | |
| "logps/chosen": -59.065216064453125, | |
| "logps/rejected": -15.607768058776855, | |
| "loss": 0.2626, | |
| "rewards/accuracies": 0.9816176295280457, | |
| "rewards/chosen": 1.058206558227539, | |
| "rewards/margins": 1.5873197317123413, | |
| "rewards/rejected": -0.529113233089447, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.3032329988851728, | |
| "grad_norm": 2.656168222427368, | |
| "learning_rate": 7.743801652892562e-07, | |
| "logits/chosen": -0.5623682737350464, | |
| "logits/rejected": -0.5051128268241882, | |
| "logps/chosen": -57.070865631103516, | |
| "logps/rejected": -18.777294158935547, | |
| "loss": 0.1687, | |
| "rewards/accuracies": 0.9871323704719543, | |
| "rewards/chosen": 1.3186638355255127, | |
| "rewards/margins": 2.2629363536834717, | |
| "rewards/rejected": -0.9442727565765381, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.35377183203270157, | |
| "grad_norm": 1.3962138891220093, | |
| "learning_rate": 7.181818181818181e-07, | |
| "logits/chosen": -0.5668503046035767, | |
| "logits/rejected": -0.5197104215621948, | |
| "logps/chosen": -58.299644470214844, | |
| "logps/rejected": -24.42568016052246, | |
| "loss": 0.1139, | |
| "rewards/accuracies": 0.9889705777168274, | |
| "rewards/chosen": 1.4811238050460815, | |
| "rewards/margins": 2.8274738788604736, | |
| "rewards/rejected": -1.346349835395813, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.4043106651802304, | |
| "grad_norm": 4.664539337158203, | |
| "learning_rate": 6.619834710743801e-07, | |
| "logits/chosen": -0.5390637516975403, | |
| "logits/rejected": -0.5045632719993591, | |
| "logps/chosen": -49.41936492919922, | |
| "logps/rejected": -27.90851593017578, | |
| "loss": 0.077, | |
| "rewards/accuracies": 0.9889705777168274, | |
| "rewards/chosen": 1.4117194414138794, | |
| "rewards/margins": 3.2265889644622803, | |
| "rewards/rejected": -1.8148694038391113, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.45484949832775917, | |
| "grad_norm": 0.970925509929657, | |
| "learning_rate": 6.057851239669421e-07, | |
| "logits/chosen": -0.5126790404319763, | |
| "logits/rejected": -0.46291089057922363, | |
| "logps/chosen": -58.05442810058594, | |
| "logps/rejected": -35.03934097290039, | |
| "loss": 0.0496, | |
| "rewards/accuracies": 0.9908088445663452, | |
| "rewards/chosen": 1.6365827322006226, | |
| "rewards/margins": 3.8830738067626953, | |
| "rewards/rejected": -2.246490955352783, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.5053883314752879, | |
| "grad_norm": 1.9165253639221191, | |
| "learning_rate": 5.49586776859504e-07, | |
| "logits/chosen": -0.5267462730407715, | |
| "logits/rejected": -0.45349758863449097, | |
| "logps/chosen": -53.660301208496094, | |
| "logps/rejected": -34.66725158691406, | |
| "loss": 0.0412, | |
| "rewards/accuracies": 0.9834558963775635, | |
| "rewards/chosen": 1.7377012968063354, | |
| "rewards/margins": 4.283178329467773, | |
| "rewards/rejected": -2.5454771518707275, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.5559271646228168, | |
| "grad_norm": 0.5560820698738098, | |
| "learning_rate": 4.933884297520661e-07, | |
| "logits/chosen": -0.5102059841156006, | |
| "logits/rejected": -0.44081857800483704, | |
| "logps/chosen": -48.95499801635742, | |
| "logps/rejected": -38.01549530029297, | |
| "loss": 0.0338, | |
| "rewards/accuracies": 0.9908088445663452, | |
| "rewards/chosen": 1.5988441705703735, | |
| "rewards/margins": 4.466281890869141, | |
| "rewards/rejected": -2.8674376010894775, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.6064659977703456, | |
| "grad_norm": 0.654052734375, | |
| "learning_rate": 4.3719008264462806e-07, | |
| "logits/chosen": -0.5107941627502441, | |
| "logits/rejected": -0.424625426530838, | |
| "logps/chosen": -52.267120361328125, | |
| "logps/rejected": -39.51735305786133, | |
| "loss": 0.0403, | |
| "rewards/accuracies": 0.9797794222831726, | |
| "rewards/chosen": 1.5989586114883423, | |
| "rewards/margins": 4.681400775909424, | |
| "rewards/rejected": -3.0824427604675293, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.6570048309178744, | |
| "grad_norm": 0.2799667716026306, | |
| "learning_rate": 3.8099173553719006e-07, | |
| "logits/chosen": -0.48151201009750366, | |
| "logits/rejected": -0.3872612416744232, | |
| "logps/chosen": -51.13566970825195, | |
| "logps/rejected": -41.436946868896484, | |
| "loss": 0.0386, | |
| "rewards/accuracies": 0.9852941036224365, | |
| "rewards/chosen": 1.6831274032592773, | |
| "rewards/margins": 4.950973033905029, | |
| "rewards/rejected": -3.26784610748291, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.7075436640654031, | |
| "grad_norm": 3.401352882385254, | |
| "learning_rate": 3.2479338842975206e-07, | |
| "logits/chosen": -0.4963739216327667, | |
| "logits/rejected": -0.397490918636322, | |
| "logps/chosen": -51.522117614746094, | |
| "logps/rejected": -42.51453399658203, | |
| "loss": 0.0242, | |
| "rewards/accuracies": 0.9908088445663452, | |
| "rewards/chosen": 1.671505331993103, | |
| "rewards/margins": 4.983243465423584, | |
| "rewards/rejected": -3.3117384910583496, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.758082497212932, | |
| "grad_norm": 0.16179317235946655, | |
| "learning_rate": 2.6859504132231406e-07, | |
| "logits/chosen": -0.4788703918457031, | |
| "logits/rejected": -0.3747369050979614, | |
| "logps/chosen": -54.24635314941406, | |
| "logps/rejected": -42.791847229003906, | |
| "loss": 0.037, | |
| "rewards/accuracies": 0.9852941036224365, | |
| "rewards/chosen": 1.7351138591766357, | |
| "rewards/margins": 5.101663112640381, | |
| "rewards/rejected": -3.366548776626587, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.8086213303604608, | |
| "grad_norm": 0.2579549252986908, | |
| "learning_rate": 2.1239669421487603e-07, | |
| "logits/chosen": -0.48607107996940613, | |
| "logits/rejected": -0.3732473850250244, | |
| "logps/chosen": -53.21406555175781, | |
| "logps/rejected": -45.13155746459961, | |
| "loss": 0.0328, | |
| "rewards/accuracies": 0.9852941036224365, | |
| "rewards/chosen": 1.671941876411438, | |
| "rewards/margins": 5.195909023284912, | |
| "rewards/rejected": -3.5239670276641846, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 0.8591601635079896, | |
| "grad_norm": 0.4580838680267334, | |
| "learning_rate": 1.56198347107438e-07, | |
| "logits/chosen": -0.4816429018974304, | |
| "logits/rejected": -0.36846473813056946, | |
| "logps/chosen": -53.959617614746094, | |
| "logps/rejected": -46.597286224365234, | |
| "loss": 0.0252, | |
| "rewards/accuracies": 0.9889705777168274, | |
| "rewards/chosen": 1.7494579553604126, | |
| "rewards/margins": 5.300571441650391, | |
| "rewards/rejected": -3.5511131286621094, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 0.9096989966555183, | |
| "grad_norm": 0.2617437243461609, | |
| "learning_rate": 1e-07, | |
| "logits/chosen": -0.48045316338539124, | |
| "logits/rejected": -0.3751773536205292, | |
| "logps/chosen": -53.45015335083008, | |
| "logps/rejected": -45.071617126464844, | |
| "loss": 0.0358, | |
| "rewards/accuracies": 0.9834558963775635, | |
| "rewards/chosen": 1.7436227798461914, | |
| "rewards/margins": 5.314986705780029, | |
| "rewards/rejected": -3.571363925933838, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 0.9602378298030472, | |
| "grad_norm": 0.671177864074707, | |
| "learning_rate": 4.3801652892561986e-08, | |
| "logits/chosen": -0.47449061274528503, | |
| "logits/rejected": -0.35607320070266724, | |
| "logps/chosen": -55.56103515625, | |
| "logps/rejected": -45.930023193359375, | |
| "loss": 0.0315, | |
| "rewards/accuracies": 0.9834558963775635, | |
| "rewards/chosen": 1.7040444612503052, | |
| "rewards/margins": 5.217987060546875, | |
| "rewards/rejected": -3.5139424800872803, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 0.9996283909327388, | |
| "step": 1345, | |
| "total_flos": 7.57814156543656e+17, | |
| "train_loss": 0.17186723207452484, | |
| "train_runtime": 19211.7112, | |
| "train_samples_per_second": 0.56, | |
| "train_steps_per_second": 0.07 | |
| } | |
| ], | |
| "logging_steps": 68, | |
| "max_steps": 1345, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7.57814156543656e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |