| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9984, |
| "eval_steps": 100, |
| "global_step": 156, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01, |
| "learning_rate": 3.125e-07, |
| "logits/chosen": -2.89351749420166, |
| "logits/rejected": -2.7752203941345215, |
| "logps/chosen": -345.7324523925781, |
| "logps/rejected": -319.42047119140625, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 3.125e-06, |
| "logits/chosen": -2.8028833866119385, |
| "logits/rejected": -2.7471988201141357, |
| "logps/chosen": -255.036865234375, |
| "logps/rejected": -252.82679748535156, |
| "loss": 0.6928, |
| "rewards/accuracies": 0.4583333432674408, |
| "rewards/chosen": 0.0003066221543122083, |
| "rewards/margins": 0.0006635435856878757, |
| "rewards/rejected": -0.0003569214604794979, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.989935734988098e-06, |
| "logits/chosen": -2.7710824012756348, |
| "logits/rejected": -2.7166454792022705, |
| "logps/chosen": -277.1798400878906, |
| "logps/rejected": -256.997802734375, |
| "loss": 0.6889, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": 0.012738336808979511, |
| "rewards/margins": 0.008854442276060581, |
| "rewards/rejected": 0.00388389453291893, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.8776412907378845e-06, |
| "logits/chosen": -2.7218027114868164, |
| "logits/rejected": -2.658277750015259, |
| "logps/chosen": -274.3503112792969, |
| "logps/rejected": -246.41128540039062, |
| "loss": 0.6795, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 0.03419749438762665, |
| "rewards/margins": 0.027278240770101547, |
| "rewards/rejected": 0.006919251289218664, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 4.646121984004666e-06, |
| "logits/chosen": -2.7631821632385254, |
| "logits/rejected": -2.6550638675689697, |
| "logps/chosen": -268.45220947265625, |
| "logps/rejected": -251.58743286132812, |
| "loss": 0.6693, |
| "rewards/accuracies": 0.715624988079071, |
| "rewards/chosen": 0.03486743941903114, |
| "rewards/margins": 0.05773182958364487, |
| "rewards/rejected": -0.022864393889904022, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 4.3069871595684795e-06, |
| "logits/chosen": -2.676445960998535, |
| "logits/rejected": -2.62298321723938, |
| "logps/chosen": -274.287841796875, |
| "logps/rejected": -281.6821594238281, |
| "loss": 0.6605, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -0.003023784141987562, |
| "rewards/margins": 0.07856948673725128, |
| "rewards/rejected": -0.08159326761960983, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 3.8772424536302565e-06, |
| "logits/chosen": -2.687849521636963, |
| "logits/rejected": -2.6154909133911133, |
| "logps/chosen": -287.0888366699219, |
| "logps/rejected": -274.48822021484375, |
| "loss": 0.6507, |
| "rewards/accuracies": 0.715624988079071, |
| "rewards/chosen": -0.03279999643564224, |
| "rewards/margins": 0.11418493092060089, |
| "rewards/rejected": -0.14698493480682373, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 3.3784370602033572e-06, |
| "logits/chosen": -2.6265475749969482, |
| "logits/rejected": -2.570312976837158, |
| "logps/chosen": -304.10125732421875, |
| "logps/rejected": -292.9505310058594, |
| "loss": 0.6431, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.033603884279727936, |
| "rewards/margins": 0.12847770750522614, |
| "rewards/rejected": -0.16208159923553467, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 2.835583164544139e-06, |
| "logits/chosen": -2.6333694458007812, |
| "logits/rejected": -2.5370612144470215, |
| "logps/chosen": -302.35736083984375, |
| "logps/rejected": -276.11175537109375, |
| "loss": 0.634, |
| "rewards/accuracies": 0.6812499761581421, |
| "rewards/chosen": -0.04339645802974701, |
| "rewards/margins": 0.14248773455619812, |
| "rewards/rejected": -0.18588420748710632, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 2.2759017277414165e-06, |
| "logits/chosen": -2.584850311279297, |
| "logits/rejected": -2.5956850051879883, |
| "logps/chosen": -292.6184997558594, |
| "logps/rejected": -311.091796875, |
| "loss": 0.6249, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.09801232814788818, |
| "rewards/margins": 0.17943526804447174, |
| "rewards/rejected": -0.27744758129119873, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 1.7274575140626318e-06, |
| "logits/chosen": -2.6319050788879395, |
| "logits/rejected": -2.5922951698303223, |
| "logps/chosen": -290.19976806640625, |
| "logps/rejected": -273.79815673828125, |
| "loss": 0.6297, |
| "rewards/accuracies": 0.643750011920929, |
| "rewards/chosen": -0.1279720962047577, |
| "rewards/margins": 0.16270975768566132, |
| "rewards/rejected": -0.2906818985939026, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.64, |
| "eval_logits/chosen": -2.6261305809020996, |
| "eval_logits/rejected": -2.541619062423706, |
| "eval_logps/chosen": -295.3544006347656, |
| "eval_logps/rejected": -286.08172607421875, |
| "eval_loss": 0.6284892559051514, |
| "eval_rewards/accuracies": 0.699999988079071, |
| "eval_rewards/chosen": -0.11314628273248672, |
| "eval_rewards/margins": 0.1725120097398758, |
| "eval_rewards/rejected": -0.2856582701206207, |
| "eval_runtime": 382.1442, |
| "eval_samples_per_second": 5.234, |
| "eval_steps_per_second": 0.654, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 1.217751806485235e-06, |
| "logits/chosen": -2.6338038444519043, |
| "logits/rejected": -2.5288445949554443, |
| "logps/chosen": -289.0359802246094, |
| "logps/rejected": -275.2894287109375, |
| "loss": 0.6177, |
| "rewards/accuracies": 0.684374988079071, |
| "rewards/chosen": -0.06765095144510269, |
| "rewards/margins": 0.20255950093269348, |
| "rewards/rejected": -0.2702104449272156, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 7.723433775328385e-07, |
| "logits/chosen": -2.552917003631592, |
| "logits/rejected": -2.550574779510498, |
| "logps/chosen": -274.2412109375, |
| "logps/rejected": -288.6865539550781, |
| "loss": 0.6251, |
| "rewards/accuracies": 0.6812499761581421, |
| "rewards/chosen": -0.10379727929830551, |
| "rewards/margins": 0.1611437350511551, |
| "rewards/rejected": -0.2649410367012024, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 4.1356686569674344e-07, |
| "logits/chosen": -2.611356735229492, |
| "logits/rejected": -2.5477137565612793, |
| "logps/chosen": -291.66290283203125, |
| "logps/rejected": -301.75537109375, |
| "loss": 0.6233, |
| "rewards/accuracies": 0.690625011920929, |
| "rewards/chosen": -0.11176357418298721, |
| "rewards/margins": 0.20609335601329803, |
| "rewards/rejected": -0.31785690784454346, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 1.59412823400657e-07, |
| "logits/chosen": -2.6064445972442627, |
| "logits/rejected": -2.479429244995117, |
| "logps/chosen": -313.6204833984375, |
| "logps/rejected": -287.5814514160156, |
| "loss": 0.6146, |
| "rewards/accuracies": 0.684374988079071, |
| "rewards/chosen": -0.09765791893005371, |
| "rewards/margins": 0.21785131096839905, |
| "rewards/rejected": -0.31550922989845276, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 2.262559558016325e-08, |
| "logits/chosen": -2.58495831489563, |
| "logits/rejected": -2.4780099391937256, |
| "logps/chosen": -300.6700134277344, |
| "logps/rejected": -281.14935302734375, |
| "loss": 0.6193, |
| "rewards/accuracies": 0.746874988079071, |
| "rewards/chosen": -0.11531106382608414, |
| "rewards/margins": 0.22770515084266663, |
| "rewards/rejected": -0.34301620721817017, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 156, |
| "total_flos": 0.0, |
| "train_loss": 0.6446946973984058, |
| "train_runtime": 7153.8565, |
| "train_samples_per_second": 2.796, |
| "train_steps_per_second": 0.022 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 156, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 100, |
| "total_flos": 0.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|