{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.060836501901140684, "eval_steps": 50, "global_step": 20, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.003041825095057034, "grad_norm": 0.2263541966676712, "learning_rate": 9.090909090909091e-06, "log_odds_chosen": 0.0022868067026138306, "log_odds_ratio": -0.6926867961883545, "logits/chosen": -2.233539581298828, "logits/rejected": -2.2098608016967773, "logps/chosen": -0.7675836682319641, "logps/rejected": -0.7697547078132629, "loss": 0.85230005, "memory(GiB)": 243.33, "nll_loss": 0.7675836682319641, "rewards/accuracies": 0.375, "rewards/chosen": -0.07675836980342865, "rewards/margins": 0.00021709827706217766, "rewards/rejected": -0.07697547227144241, "step": 1, "train_speed(iter/s)": 0.010532 }, { "epoch": 0.006083650190114068, "grad_norm": 0.21064028143882751, "learning_rate": 1.8181818181818182e-05, "log_odds_chosen": 0.411478191614151, "log_odds_ratio": -0.5463609099388123, "logits/chosen": -2.2685160636901855, "logits/rejected": -2.260843515396118, "logps/chosen": -0.9355340003967285, "logps/rejected": -1.236234188079834, "loss": 0.87318134, "memory(GiB)": 243.33, "nll_loss": 0.9355340003967285, "rewards/accuracies": 0.875, "rewards/chosen": -0.09355340152978897, "rewards/margins": 0.030070019885897636, "rewards/rejected": -0.12362341582775116, "step": 2, "train_speed(iter/s)": 0.009734 }, { "epoch": 0.009125475285171103, "grad_norm": 0.19250068068504333, "learning_rate": 2.727272727272727e-05, "log_odds_chosen": 0.3938485383987427, "log_odds_ratio": -0.5777455568313599, "logits/chosen": -1.993510365486145, "logits/rejected": -1.9485821723937988, "logps/chosen": -0.6979267001152039, "logps/rejected": -0.9728450775146484, "loss": 0.75437951, "memory(GiB)": 243.33, "nll_loss": 0.6979267597198486, "rewards/accuracies": 0.375, "rewards/chosen": -0.06979266554117203, "rewards/margins": 0.027491841465234756, "rewards/rejected": -0.09728450328111649, "step": 3, "train_speed(iter/s)": 0.01152 }, { "epoch": 0.012167300380228136, "grad_norm": 0.20081621408462524, "learning_rate": 3.6363636363636364e-05, "log_odds_chosen": 0.0849333107471466, "log_odds_ratio": -0.6586424112319946, "logits/chosen": -2.3017446994781494, "logits/rejected": -2.300307512283325, "logps/chosen": -0.7535954713821411, "logps/rejected": -0.8145345449447632, "loss": 0.85012603, "memory(GiB)": 315.51, "nll_loss": 0.7535954713821411, "rewards/accuracies": 0.5, "rewards/chosen": -0.0753595381975174, "rewards/margins": 0.006093906704336405, "rewards/rejected": -0.08145345002412796, "step": 4, "train_speed(iter/s)": 0.011285 }, { "epoch": 0.015209125475285171, "grad_norm": 0.20968054234981537, "learning_rate": 4.545454545454545e-05, "log_odds_chosen": 0.2265796959400177, "log_odds_ratio": -0.605457067489624, "logits/chosen": -2.132286548614502, "logits/rejected": -2.0954604148864746, "logps/chosen": -0.7475967407226562, "logps/rejected": -0.9037913680076599, "loss": 0.83076566, "memory(GiB)": 349.51, "nll_loss": 0.747596800327301, "rewards/accuracies": 0.75, "rewards/chosen": -0.07475967705249786, "rewards/margins": 0.015619462355971336, "rewards/rejected": -0.09037913382053375, "step": 5, "train_speed(iter/s)": 0.01019 }, { "epoch": 0.018250950570342206, "grad_norm": 0.28389012813568115, "learning_rate": 5.454545454545454e-05, "log_odds_chosen": 0.07245179265737534, "log_odds_ratio": -0.6642654538154602, "logits/chosen": -2.207275867462158, "logits/rejected": -2.194154977798462, "logps/chosen": -0.7644542455673218, "logps/rejected": -0.811606228351593, "loss": 0.80996728, "memory(GiB)": 304.24, "nll_loss": 0.7644542455673218, "rewards/accuracies": 0.625, "rewards/chosen": -0.07644543051719666, "rewards/margins": 0.004715194460004568, "rewards/rejected": -0.08116061985492706, "step": 6, "train_speed(iter/s)": 0.010519 }, { "epoch": 0.02129277566539924, "grad_norm": 0.2204827517271042, "learning_rate": 6.363636363636363e-05, "log_odds_chosen": 0.0014549940824508667, "log_odds_ratio": -0.6934571266174316, "logits/chosen": -2.2422585487365723, "logits/rejected": -2.2558586597442627, "logps/chosen": -0.7055240869522095, "logps/rejected": -0.7079155445098877, "loss": 0.80136919, "memory(GiB)": 376.14, "nll_loss": 0.7055240869522095, "rewards/accuracies": 0.5, "rewards/chosen": -0.07055240869522095, "rewards/margins": 0.0002391412854194641, "rewards/rejected": -0.07079154998064041, "step": 7, "train_speed(iter/s)": 0.010453 }, { "epoch": 0.024334600760456272, "grad_norm": 0.18603529036045074, "learning_rate": 7.272727272727273e-05, "log_odds_chosen": 0.2205890268087387, "log_odds_ratio": -0.6130101084709167, "logits/chosen": -2.1775014400482178, "logits/rejected": -2.1533212661743164, "logps/chosen": -0.6938110589981079, "logps/rejected": -0.8382289409637451, "loss": 0.73867655, "memory(GiB)": 337.96, "nll_loss": 0.6938109993934631, "rewards/accuracies": 0.875, "rewards/chosen": -0.06938110291957855, "rewards/margins": 0.01444179005920887, "rewards/rejected": -0.08382289111614227, "step": 8, "train_speed(iter/s)": 0.010026 }, { "epoch": 0.02737642585551331, "grad_norm": 0.2093774527311325, "learning_rate": 8.18181818181818e-05, "log_odds_chosen": 0.28880590200424194, "log_odds_ratio": -0.5970734357833862, "logits/chosen": -2.231719970703125, "logits/rejected": -2.1949002742767334, "logps/chosen": -0.6275652647018433, "logps/rejected": -0.8133708834648132, "loss": 0.73195064, "memory(GiB)": 337.96, "nll_loss": 0.627565324306488, "rewards/accuracies": 0.75, "rewards/chosen": -0.06275652348995209, "rewards/margins": 0.018580568954348564, "rewards/rejected": -0.0813370943069458, "step": 9, "train_speed(iter/s)": 0.010471 }, { "epoch": 0.030418250950570342, "grad_norm": 0.17292404174804688, "learning_rate": 9.09090909090909e-05, "log_odds_chosen": 0.1367553025484085, "log_odds_ratio": -0.6314333081245422, "logits/chosen": -2.193378448486328, "logits/rejected": -2.2005226612091064, "logps/chosen": -0.712138831615448, "logps/rejected": -0.7979795932769775, "loss": 0.72619164, "memory(GiB)": 337.96, "nll_loss": 0.7121387720108032, "rewards/accuracies": 0.75, "rewards/chosen": -0.07121387869119644, "rewards/margins": 0.00858408585190773, "rewards/rejected": -0.07979796826839447, "step": 10, "train_speed(iter/s)": 0.010603 }, { "epoch": 0.033460076045627375, "grad_norm": 0.18145819008350372, "learning_rate": 9.999999999999999e-05, "log_odds_chosen": 0.39647987484931946, "log_odds_ratio": -0.5509629249572754, "logits/chosen": -1.9035614728927612, "logits/rejected": -1.8070275783538818, "logps/chosen": -0.6602268218994141, "logps/rejected": -0.9264889359474182, "loss": 0.63924813, "memory(GiB)": 337.96, "nll_loss": 0.6602268218994141, "rewards/accuracies": 0.75, "rewards/chosen": -0.06602267920970917, "rewards/margins": 0.026626210659742355, "rewards/rejected": -0.09264889359474182, "step": 11, "train_speed(iter/s)": 0.011075 }, { "epoch": 0.03650190114068441, "grad_norm": 0.1996176391839981, "learning_rate": 0.00010909090909090908, "log_odds_chosen": -0.008832626044750214, "log_odds_ratio": -0.6989511251449585, "logits/chosen": -2.1484761238098145, "logits/rejected": -2.1583542823791504, "logps/chosen": -0.5783950686454773, "logps/rejected": -0.5806748867034912, "loss": 0.66453189, "memory(GiB)": 337.96, "nll_loss": 0.5783950686454773, "rewards/accuracies": 0.375, "rewards/chosen": -0.05783950537443161, "rewards/margins": 0.0002279849722981453, "rewards/rejected": -0.05806749314069748, "step": 12, "train_speed(iter/s)": 0.011198 }, { "epoch": 0.03954372623574144, "grad_norm": 0.18517741560935974, "learning_rate": 0.00011818181818181817, "log_odds_chosen": 0.1502607762813568, "log_odds_ratio": -0.6237398386001587, "logits/chosen": -2.078707695007324, "logits/rejected": -2.0867443084716797, "logps/chosen": -0.6652951240539551, "logps/rejected": -0.7532068490982056, "loss": 0.71045852, "memory(GiB)": 337.96, "nll_loss": 0.6652951240539551, "rewards/accuracies": 0.875, "rewards/chosen": -0.0665295198559761, "rewards/margins": 0.008791167289018631, "rewards/rejected": -0.07532068341970444, "step": 13, "train_speed(iter/s)": 0.011144 }, { "epoch": 0.04258555133079848, "grad_norm": 0.460842102766037, "learning_rate": 0.00012727272727272725, "log_odds_chosen": 0.08848065137863159, "log_odds_ratio": -0.6523964405059814, "logits/chosen": -2.0903429985046387, "logits/rejected": -2.071244716644287, "logps/chosen": -0.5602254867553711, "logps/rejected": -0.6098269820213318, "loss": 0.62673962, "memory(GiB)": 337.96, "nll_loss": 0.5602255463600159, "rewards/accuracies": 0.75, "rewards/chosen": -0.05602255091071129, "rewards/margins": 0.004960143007338047, "rewards/rejected": -0.06098269298672676, "step": 14, "train_speed(iter/s)": 0.011223 }, { "epoch": 0.045627376425855515, "grad_norm": 0.201682448387146, "learning_rate": 0.00013636363636363634, "log_odds_chosen": 0.0586659274995327, "log_odds_ratio": -0.6654743552207947, "logits/chosen": -2.460188388824463, "logits/rejected": -2.4563236236572266, "logps/chosen": -0.3543914556503296, "logps/rejected": -0.3674345910549164, "loss": 0.49967062, "memory(GiB)": 337.96, "nll_loss": 0.3543914258480072, "rewards/accuracies": 0.875, "rewards/chosen": -0.0354391448199749, "rewards/margins": 0.0013043158687651157, "rewards/rejected": -0.03674346208572388, "step": 15, "train_speed(iter/s)": 0.011535 }, { "epoch": 0.048669201520912544, "grad_norm": 0.20425710082054138, "learning_rate": 0.00014545454545454546, "log_odds_chosen": 0.06445352733135223, "log_odds_ratio": -0.6619336009025574, "logits/chosen": -1.9317834377288818, "logits/rejected": -1.9417508840560913, "logps/chosen": -0.5264174938201904, "logps/rejected": -0.5525864958763123, "loss": 0.62982225, "memory(GiB)": 337.96, "nll_loss": 0.5264174938201904, "rewards/accuracies": 0.75, "rewards/chosen": -0.05264175310730934, "rewards/margins": 0.0026168967597186565, "rewards/rejected": -0.055258650332689285, "step": 16, "train_speed(iter/s)": 0.011487 }, { "epoch": 0.05171102661596958, "grad_norm": 0.19479066133499146, "learning_rate": 0.00015454545454545452, "log_odds_chosen": 0.05428645759820938, "log_odds_ratio": -0.6680891513824463, "logits/chosen": -2.0513648986816406, "logits/rejected": -2.0547256469726562, "logps/chosen": -0.5574195384979248, "logps/rejected": -0.5769228935241699, "loss": 0.65339994, "memory(GiB)": 337.96, "nll_loss": 0.5574195384979248, "rewards/accuracies": 0.75, "rewards/chosen": -0.05574195832014084, "rewards/margins": 0.001950339414179325, "rewards/rejected": -0.05769229307770729, "step": 17, "train_speed(iter/s)": 0.011193 }, { "epoch": 0.05475285171102662, "grad_norm": 0.20096692442893982, "learning_rate": 0.0001636363636363636, "log_odds_chosen": 0.11649945378303528, "log_odds_ratio": -0.6375795006752014, "logits/chosen": -1.979109764099121, "logits/rejected": -1.9783812761306763, "logps/chosen": -0.6062220335006714, "logps/rejected": -0.6605215072631836, "loss": 0.61762023, "memory(GiB)": 337.96, "nll_loss": 0.6062220931053162, "rewards/accuracies": 0.875, "rewards/chosen": -0.0606222040951252, "rewards/margins": 0.005429944489151239, "rewards/rejected": -0.06605214625597, "step": 18, "train_speed(iter/s)": 0.010973 }, { "epoch": 0.05779467680608365, "grad_norm": 0.22299402952194214, "learning_rate": 0.00017272727272727272, "log_odds_chosen": 0.275879442691803, "log_odds_ratio": -0.5872041583061218, "logits/chosen": -1.7563273906707764, "logits/rejected": -1.7962664365768433, "logps/chosen": -0.5380970239639282, "logps/rejected": -0.7168144583702087, "loss": 0.62556648, "memory(GiB)": 337.96, "nll_loss": 0.5380970239639282, "rewards/accuracies": 0.75, "rewards/chosen": -0.05380970239639282, "rewards/margins": 0.017871743068099022, "rewards/rejected": -0.071681447327137, "step": 19, "train_speed(iter/s)": 0.010995 }, { "epoch": 0.060836501901140684, "grad_norm": 0.19399508833885193, "learning_rate": 0.0001818181818181818, "log_odds_chosen": -0.016166899353265762, "log_odds_ratio": -0.702415943145752, "logits/chosen": -2.1521270275115967, "logits/rejected": -2.151902914047241, "logps/chosen": -0.41145703196525574, "logps/rejected": -0.4066007733345032, "loss": 0.49458012, "memory(GiB)": 337.96, "nll_loss": 0.41145703196525574, "rewards/accuracies": 0.375, "rewards/chosen": -0.041145700961351395, "rewards/margins": -0.000485624885186553, "rewards/rejected": -0.040660079568624496, "step": 20, "train_speed(iter/s)": 0.011065 } ], "logging_steps": 1, "max_steps": 328, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 9.570806552906957e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }