| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9984, |
| "eval_steps": 100, |
| "global_step": 156, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01, |
| "learning_rate": 3.125e-07, |
| "logits/chosen": -2.72961163520813, |
| "logits/rejected": -2.7527058124542236, |
| "logps/chosen": -133.97433471679688, |
| "logps/rejected": -138.8169403076172, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 3.125e-06, |
| "logits/chosen": -2.7357263565063477, |
| "logits/rejected": -2.7272207736968994, |
| "logps/chosen": -115.04521942138672, |
| "logps/rejected": -114.19779205322266, |
| "loss": 0.6932, |
| "rewards/accuracies": 0.3888888955116272, |
| "rewards/chosen": 0.0004528095596469939, |
| "rewards/margins": -0.000532312027644366, |
| "rewards/rejected": 0.00098512158729136, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.989935734988098e-06, |
| "logits/chosen": -2.721278667449951, |
| "logits/rejected": -2.7116167545318604, |
| "logps/chosen": -111.3840103149414, |
| "logps/rejected": -116.3367691040039, |
| "loss": 0.6927, |
| "rewards/accuracies": 0.5218750238418579, |
| "rewards/chosen": -0.014203068800270557, |
| "rewards/margins": 0.0006986708613112569, |
| "rewards/rejected": -0.014901740476489067, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.8776412907378845e-06, |
| "logits/chosen": -2.742332696914673, |
| "logits/rejected": -2.7299842834472656, |
| "logps/chosen": -123.17195129394531, |
| "logps/rejected": -122.3455581665039, |
| "loss": 0.6925, |
| "rewards/accuracies": 0.49687498807907104, |
| "rewards/chosen": -0.09704665839672089, |
| "rewards/margins": 0.00010085676331073046, |
| "rewards/rejected": -0.09714751690626144, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 4.646121984004666e-06, |
| "logits/chosen": -2.700500965118408, |
| "logits/rejected": -2.673189878463745, |
| "logps/chosen": -116.55732727050781, |
| "logps/rejected": -118.69517517089844, |
| "loss": 0.6924, |
| "rewards/accuracies": 0.49687498807907104, |
| "rewards/chosen": -0.08498911559581757, |
| "rewards/margins": 5.4714873840566725e-05, |
| "rewards/rejected": -0.08504383265972137, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 4.3069871595684795e-06, |
| "logits/chosen": -2.740999698638916, |
| "logits/rejected": -2.711369514465332, |
| "logps/chosen": -111.51325988769531, |
| "logps/rejected": -118.1547622680664, |
| "loss": 0.6904, |
| "rewards/accuracies": 0.543749988079071, |
| "rewards/chosen": -0.007794947363436222, |
| "rewards/margins": 0.005682565737515688, |
| "rewards/rejected": -0.013477511703968048, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 3.8772424536302565e-06, |
| "logits/chosen": -2.682302236557007, |
| "logits/rejected": -2.660250425338745, |
| "logps/chosen": -119.29142761230469, |
| "logps/rejected": -123.52491760253906, |
| "loss": 0.6896, |
| "rewards/accuracies": 0.550000011920929, |
| "rewards/chosen": -0.05805445462465286, |
| "rewards/margins": 0.008577173575758934, |
| "rewards/rejected": -0.06663163006305695, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 3.3784370602033572e-06, |
| "logits/chosen": -2.6593658924102783, |
| "logits/rejected": -2.635847568511963, |
| "logps/chosen": -110.70475769042969, |
| "logps/rejected": -111.5876693725586, |
| "loss": 0.6896, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -0.05531386658549309, |
| "rewards/margins": 0.013766427524387836, |
| "rewards/rejected": -0.06908029317855835, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 2.835583164544139e-06, |
| "logits/chosen": -2.6447205543518066, |
| "logits/rejected": -2.6153109073638916, |
| "logps/chosen": -116.82215881347656, |
| "logps/rejected": -122.38499450683594, |
| "loss": 0.6888, |
| "rewards/accuracies": 0.550000011920929, |
| "rewards/chosen": -0.044131264090538025, |
| "rewards/margins": 0.011304137296974659, |
| "rewards/rejected": -0.05543540045619011, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 2.2759017277414165e-06, |
| "logits/chosen": -2.6791300773620605, |
| "logits/rejected": -2.670039653778076, |
| "logps/chosen": -137.33778381347656, |
| "logps/rejected": -134.17361450195312, |
| "loss": 0.6871, |
| "rewards/accuracies": 0.559374988079071, |
| "rewards/chosen": -0.0856749638915062, |
| "rewards/margins": 0.015940625220537186, |
| "rewards/rejected": -0.10161559283733368, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 1.7274575140626318e-06, |
| "logits/chosen": -2.6628499031066895, |
| "logits/rejected": -2.6339688301086426, |
| "logps/chosen": -124.69636535644531, |
| "logps/rejected": -118.883544921875, |
| "loss": 0.6886, |
| "rewards/accuracies": 0.543749988079071, |
| "rewards/chosen": -0.03587063401937485, |
| "rewards/margins": 0.011753683909773827, |
| "rewards/rejected": -0.047624316066503525, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.64, |
| "eval_logits/chosen": -2.6271259784698486, |
| "eval_logits/rejected": -2.5384714603424072, |
| "eval_logps/chosen": -288.5429382324219, |
| "eval_logps/rejected": -268.43902587890625, |
| "eval_loss": 0.678156852722168, |
| "eval_rewards/accuracies": 0.6100000143051147, |
| "eval_rewards/chosen": -0.06864660233259201, |
| "eval_rewards/margins": 0.02844993770122528, |
| "eval_rewards/rejected": -0.0970965251326561, |
| "eval_runtime": 383.7648, |
| "eval_samples_per_second": 5.212, |
| "eval_steps_per_second": 0.651, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 1.217751806485235e-06, |
| "logits/chosen": -2.6874401569366455, |
| "logits/rejected": -2.6627275943756104, |
| "logps/chosen": -120.39668273925781, |
| "logps/rejected": -122.85832214355469, |
| "loss": 0.6876, |
| "rewards/accuracies": 0.5531250238418579, |
| "rewards/chosen": -0.04335067793726921, |
| "rewards/margins": 0.012038113549351692, |
| "rewards/rejected": -0.055388789623975754, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 7.723433775328385e-07, |
| "logits/chosen": -2.6583964824676514, |
| "logits/rejected": -2.6295745372772217, |
| "logps/chosen": -124.8127670288086, |
| "logps/rejected": -123.85284423828125, |
| "loss": 0.6869, |
| "rewards/accuracies": 0.6156250238418579, |
| "rewards/chosen": -0.060910262167453766, |
| "rewards/margins": 0.01821967028081417, |
| "rewards/rejected": -0.07912993431091309, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 4.1356686569674344e-07, |
| "logits/chosen": -2.6148152351379395, |
| "logits/rejected": -2.5909037590026855, |
| "logps/chosen": -116.5543212890625, |
| "logps/rejected": -121.72233581542969, |
| "loss": 0.6867, |
| "rewards/accuracies": 0.53125, |
| "rewards/chosen": -0.08463772386312485, |
| "rewards/margins": 0.020932147279381752, |
| "rewards/rejected": -0.10556988418102264, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 1.59412823400657e-07, |
| "logits/chosen": -2.605799674987793, |
| "logits/rejected": -2.5549862384796143, |
| "logps/chosen": -116.57108306884766, |
| "logps/rejected": -122.12138366699219, |
| "loss": 0.6885, |
| "rewards/accuracies": 0.5718749761581421, |
| "rewards/chosen": -0.05592598766088486, |
| "rewards/margins": 0.015123754739761353, |
| "rewards/rejected": -0.07104974240064621, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 2.262559558016325e-08, |
| "logits/chosen": -2.643202066421509, |
| "logits/rejected": -2.605377674102783, |
| "logps/chosen": -114.8644027709961, |
| "logps/rejected": -112.22537994384766, |
| "loss": 0.6854, |
| "rewards/accuracies": 0.5843750238418579, |
| "rewards/chosen": -0.0501478835940361, |
| "rewards/margins": 0.02834610641002655, |
| "rewards/rejected": -0.07849399000406265, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 156, |
| "total_flos": 0.0, |
| "train_loss": 0.6890946939969674, |
| "train_runtime": 6305.2604, |
| "train_samples_per_second": 3.172, |
| "train_steps_per_second": 0.025 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 156, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 100, |
| "total_flos": 0.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|