| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.986666666666667, | |
| "eval_steps": 500, | |
| "global_step": 168, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.17777777777777778, | |
| "grad_norm": 3.8474488258361816, | |
| "learning_rate": 2.9411764705882355e-06, | |
| "logits/chosen": -2.332357883453369, | |
| "logits/rejected": -2.368460178375244, | |
| "logps/chosen": -1.2429828643798828, | |
| "logps/rejected": -1.659155249595642, | |
| "loss": 1.2935, | |
| "odds_ratio_loss": 11.457437515258789, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.12429828941822052, | |
| "rewards/margins": 0.04161724075675011, | |
| "rewards/rejected": -0.16591551899909973, | |
| "sft_loss": 0.14774402976036072, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.35555555555555557, | |
| "grad_norm": 5.139791965484619, | |
| "learning_rate": 4.995131923687488e-06, | |
| "logits/chosen": -2.292804002761841, | |
| "logits/rejected": -2.327223300933838, | |
| "logps/chosen": -1.2883718013763428, | |
| "logps/rejected": -1.7239799499511719, | |
| "loss": 1.337, | |
| "odds_ratio_loss": 11.885274887084961, | |
| "rewards/accuracies": 0.831250011920929, | |
| "rewards/chosen": -0.12883718311786652, | |
| "rewards/margins": 0.043560806661844254, | |
| "rewards/rejected": -0.17239800095558167, | |
| "sft_loss": 0.14851602911949158, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.5333333333333333, | |
| "grad_norm": 5.0462751388549805, | |
| "learning_rate": 4.90911473983908e-06, | |
| "logits/chosen": -2.3472771644592285, | |
| "logits/rejected": -2.3845486640930176, | |
| "logps/chosen": -1.186187982559204, | |
| "logps/rejected": -1.59175705909729, | |
| "loss": 1.2375, | |
| "odds_ratio_loss": 10.995233535766602, | |
| "rewards/accuracies": 0.8187500238418579, | |
| "rewards/chosen": -0.11861880123615265, | |
| "rewards/margins": 0.04055692255496979, | |
| "rewards/rejected": -0.15917572379112244, | |
| "sft_loss": 0.13796505331993103, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.7111111111111111, | |
| "grad_norm": 3.613401412963867, | |
| "learning_rate": 4.71919261421297e-06, | |
| "logits/chosen": -2.370697021484375, | |
| "logits/rejected": -2.4233319759368896, | |
| "logps/chosen": -0.9008461236953735, | |
| "logps/rejected": -1.4053981304168701, | |
| "loss": 0.9426, | |
| "odds_ratio_loss": 8.488945007324219, | |
| "rewards/accuracies": 0.8687499761581421, | |
| "rewards/chosen": -0.09008461236953735, | |
| "rewards/margins": 0.05045522004365921, | |
| "rewards/rejected": -0.14053983986377716, | |
| "sft_loss": 0.09369887411594391, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.8888888888888888, | |
| "grad_norm": 3.07717227935791, | |
| "learning_rate": 4.43355687413747e-06, | |
| "logits/chosen": -2.4286742210388184, | |
| "logits/rejected": -2.4549202919006348, | |
| "logps/chosen": -0.6461768746376038, | |
| "logps/rejected": -1.0805357694625854, | |
| "loss": 0.6945, | |
| "odds_ratio_loss": 6.29970645904541, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -0.06461768597364426, | |
| "rewards/margins": 0.04343589395284653, | |
| "rewards/rejected": -0.10805357992649078, | |
| "sft_loss": 0.06456876546144485, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.0666666666666667, | |
| "grad_norm": 2.27549409866333, | |
| "learning_rate": 4.064526968101844e-06, | |
| "logits/chosen": -2.317702531814575, | |
| "logits/rejected": -2.337463855743408, | |
| "logps/chosen": -0.5583322048187256, | |
| "logps/rejected": -1.0102574825286865, | |
| "loss": 0.6081, | |
| "odds_ratio_loss": 5.508663177490234, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -0.05583322048187256, | |
| "rewards/margins": 0.04519252851605415, | |
| "rewards/rejected": -0.10102574527263641, | |
| "sft_loss": 0.05719046667218208, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.2444444444444445, | |
| "grad_norm": 2.98760724067688, | |
| "learning_rate": 3.6280191288478437e-06, | |
| "logits/chosen": -2.4034271240234375, | |
| "logits/rejected": -2.4294583797454834, | |
| "logps/chosen": -0.35430365800857544, | |
| "logps/rejected": -0.7878178358078003, | |
| "loss": 0.3968, | |
| "odds_ratio_loss": 3.5909438133239746, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -0.035430364310741425, | |
| "rewards/margins": 0.0433514229953289, | |
| "rewards/rejected": -0.07878179103136063, | |
| "sft_loss": 0.037743426859378815, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.4222222222222223, | |
| "grad_norm": 2.200446605682373, | |
| "learning_rate": 3.142859907420615e-06, | |
| "logits/chosen": -2.336760997772217, | |
| "logits/rejected": -2.365061044692993, | |
| "logps/chosen": -0.2458254098892212, | |
| "logps/rejected": -0.6315831542015076, | |
| "loss": 0.2882, | |
| "odds_ratio_loss": 2.5942039489746094, | |
| "rewards/accuracies": 0.831250011920929, | |
| "rewards/chosen": -0.02458254061639309, | |
| "rewards/margins": 0.03857577592134476, | |
| "rewards/rejected": -0.063158318400383, | |
| "sft_loss": 0.028748363256454468, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 1.6536734104156494, | |
| "learning_rate": 2.629974185404951e-06, | |
| "logits/chosen": -2.313284397125244, | |
| "logits/rejected": -2.3488173484802246, | |
| "logps/chosen": -0.1942504495382309, | |
| "logps/rejected": -0.6369145512580872, | |
| "loss": 0.2406, | |
| "odds_ratio_loss": 2.1658334732055664, | |
| "rewards/accuracies": 0.793749988079071, | |
| "rewards/chosen": -0.01942504197359085, | |
| "rewards/margins": 0.04426640644669533, | |
| "rewards/rejected": -0.06369145214557648, | |
| "sft_loss": 0.023997236043214798, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.7777777777777777, | |
| "grad_norm": 1.2765487432479858, | |
| "learning_rate": 2.1114826863194882e-06, | |
| "logits/chosen": -2.2804689407348633, | |
| "logits/rejected": -2.3144404888153076, | |
| "logps/chosen": -0.13879844546318054, | |
| "logps/rejected": -0.6243221163749695, | |
| "loss": 0.1759, | |
| "odds_ratio_loss": 1.5965832471847534, | |
| "rewards/accuracies": 0.862500011920929, | |
| "rewards/chosen": -0.013879844918847084, | |
| "rewards/margins": 0.048552367836236954, | |
| "rewards/rejected": -0.06243220716714859, | |
| "sft_loss": 0.01626196689903736, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.9555555555555557, | |
| "grad_norm": 1.2211233377456665, | |
| "learning_rate": 1.6097479104361328e-06, | |
| "logits/chosen": -2.317593812942505, | |
| "logits/rejected": -2.336158037185669, | |
| "logps/chosen": -0.1410079300403595, | |
| "logps/rejected": -0.5155030488967896, | |
| "loss": 0.1836, | |
| "odds_ratio_loss": 1.6868371963500977, | |
| "rewards/accuracies": 0.793749988079071, | |
| "rewards/chosen": -0.014100792817771435, | |
| "rewards/margins": 0.037449512630701065, | |
| "rewards/rejected": -0.051550306379795074, | |
| "sft_loss": 0.01492035947740078, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 2.1333333333333333, | |
| "grad_norm": 1.3183128833770752, | |
| "learning_rate": 1.1464096417858821e-06, | |
| "logits/chosen": -2.290233850479126, | |
| "logits/rejected": -2.3088765144348145, | |
| "logps/chosen": -0.10447756201028824, | |
| "logps/rejected": -0.6317979097366333, | |
| "loss": 0.1326, | |
| "odds_ratio_loss": 1.1886184215545654, | |
| "rewards/accuracies": 0.887499988079071, | |
| "rewards/chosen": -0.010447757318615913, | |
| "rewards/margins": 0.052732039242982864, | |
| "rewards/rejected": -0.06317979097366333, | |
| "sft_loss": 0.013712344691157341, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 2.311111111111111, | |
| "grad_norm": 1.7163466215133667, | |
| "learning_rate": 7.414516258630245e-07, | |
| "logits/chosen": -2.262594223022461, | |
| "logits/rejected": -2.284545421600342, | |
| "logps/chosen": -0.12122899293899536, | |
| "logps/rejected": -0.5315740704536438, | |
| "loss": 0.1618, | |
| "odds_ratio_loss": 1.4604320526123047, | |
| "rewards/accuracies": 0.8062499761581421, | |
| "rewards/chosen": -0.012122899293899536, | |
| "rewards/margins": 0.0410345084965229, | |
| "rewards/rejected": -0.05315741151571274, | |
| "sft_loss": 0.015804503113031387, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 2.488888888888889, | |
| "grad_norm": 3.0588433742523193, | |
| "learning_rate": 4.123396721497977e-07, | |
| "logits/chosen": -2.3231940269470215, | |
| "logits/rejected": -2.34096622467041, | |
| "logps/chosen": -0.12840591371059418, | |
| "logps/rejected": -0.5461179614067078, | |
| "loss": 0.1688, | |
| "odds_ratio_loss": 1.521639108657837, | |
| "rewards/accuracies": 0.831250011920929, | |
| "rewards/chosen": -0.012840591371059418, | |
| "rewards/margins": 0.04177120327949524, | |
| "rewards/rejected": -0.05461179465055466, | |
| "sft_loss": 0.016592377796769142, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "grad_norm": 2.731790065765381, | |
| "learning_rate": 1.7326835503629542e-07, | |
| "logits/chosen": -2.2994039058685303, | |
| "logits/rejected": -2.324022054672241, | |
| "logps/chosen": -0.12199757248163223, | |
| "logps/rejected": -0.5101950168609619, | |
| "loss": 0.1648, | |
| "odds_ratio_loss": 1.4926129579544067, | |
| "rewards/accuracies": 0.8062499761581421, | |
| "rewards/chosen": -0.012199757620692253, | |
| "rewards/margins": 0.03881974145770073, | |
| "rewards/rejected": -0.05101950094103813, | |
| "sft_loss": 0.015529977157711983, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.8444444444444446, | |
| "grad_norm": 1.6209518909454346, | |
| "learning_rate": 3.4548802869627806e-08, | |
| "logits/chosen": -2.19964599609375, | |
| "logits/rejected": -2.222959041595459, | |
| "logps/chosen": -0.11084076017141342, | |
| "logps/rejected": -0.5317128300666809, | |
| "loss": 0.1456, | |
| "odds_ratio_loss": 1.3233401775360107, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -0.011084076017141342, | |
| "rewards/margins": 0.042087212204933167, | |
| "rewards/rejected": -0.05317128449678421, | |
| "sft_loss": 0.013311339542269707, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.986666666666667, | |
| "step": 168, | |
| "total_flos": 4.410496642646016e+16, | |
| "train_loss": 0.4940077399923688, | |
| "train_runtime": 519.2548, | |
| "train_samples_per_second": 5.2, | |
| "train_steps_per_second": 0.324 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 168, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.410496642646016e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |