| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9968652037617555, |
| "eval_steps": 500, |
| "global_step": 159, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01, |
| "learning_rate": 3.125e-08, |
| "logits/chosen": -2.0781588554382324, |
| "logits/rejected": -1.9858250617980957, |
| "logps/chosen": -453.2818603515625, |
| "logps/pi_response": -300.6171875, |
| "logps/ref_response": -300.6171875, |
| "logps/rejected": -393.94146728515625, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 3.1249999999999997e-07, |
| "logits/chosen": -2.102860689163208, |
| "logits/rejected": -2.033174753189087, |
| "logps/chosen": -286.9140930175781, |
| "logps/pi_response": -192.1187744140625, |
| "logps/ref_response": -192.19212341308594, |
| "logps/rejected": -342.8356018066406, |
| "loss": 0.6925, |
| "rewards/accuracies": 0.4513888955116272, |
| "rewards/chosen": -0.0006262523238547146, |
| "rewards/margins": 0.0011568003101274371, |
| "rewards/rejected": -0.0017830526921898127, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.990353313429303e-07, |
| "logits/chosen": -1.9538593292236328, |
| "logits/rejected": -1.9250879287719727, |
| "logps/chosen": -318.6830749511719, |
| "logps/pi_response": -219.6834716796875, |
| "logps/ref_response": -184.64120483398438, |
| "logps/rejected": -481.122802734375, |
| "loss": 0.6773, |
| "rewards/accuracies": 0.5562499761581421, |
| "rewards/chosen": -0.06833034753799438, |
| "rewards/margins": 0.036504171788692474, |
| "rewards/rejected": -0.10483451187610626, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.882681251368548e-07, |
| "logits/chosen": -1.4626009464263916, |
| "logits/rejected": -1.3391458988189697, |
| "logps/chosen": -680.2674560546875, |
| "logps/pi_response": -502.0758361816406, |
| "logps/ref_response": -192.75753784179688, |
| "logps/rejected": -882.34033203125, |
| "loss": 0.6418, |
| "rewards/accuracies": 0.543749988079071, |
| "rewards/chosen": -0.39676880836486816, |
| "rewards/margins": 0.1544235348701477, |
| "rewards/rejected": -0.5511924028396606, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.6604720940421207e-07, |
| "logits/chosen": -1.0197283029556274, |
| "logits/rejected": -0.6147260069847107, |
| "logps/chosen": -553.42333984375, |
| "logps/pi_response": -358.3141784667969, |
| "logps/ref_response": -189.3666534423828, |
| "logps/rejected": -931.6701049804688, |
| "loss": 0.64, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": -0.2808569073677063, |
| "rewards/margins": 0.278983473777771, |
| "rewards/rejected": -0.5598403811454773, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 4.3344075855595097e-07, |
| "logits/chosen": -0.4955853521823883, |
| "logits/rejected": 0.25757771730422974, |
| "logps/chosen": -521.3439331054688, |
| "logps/pi_response": -355.96917724609375, |
| "logps/ref_response": -208.11865234375, |
| "logps/rejected": -1041.2548828125, |
| "loss": 0.6254, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -0.23494353890419006, |
| "rewards/margins": 0.3884660601615906, |
| "rewards/rejected": -0.6234095692634583, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 3.920161866827889e-07, |
| "logits/chosen": 0.10438306629657745, |
| "logits/rejected": 0.6441744565963745, |
| "logps/chosen": -657.8551025390625, |
| "logps/pi_response": -464.1085510253906, |
| "logps/ref_response": -197.88754272460938, |
| "logps/rejected": -1158.5789794921875, |
| "loss": 0.6116, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.38527947664260864, |
| "rewards/margins": 0.38742250204086304, |
| "rewards/rejected": -0.7727020382881165, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 3.4376480090239047e-07, |
| "logits/chosen": -0.016553742811083794, |
| "logits/rejected": 0.8143717050552368, |
| "logps/chosen": -652.5902709960938, |
| "logps/pi_response": -460.16033935546875, |
| "logps/ref_response": -180.25257873535156, |
| "logps/rejected": -1217.8525390625, |
| "loss": 0.6043, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -0.3822382688522339, |
| "rewards/margins": 0.471721351146698, |
| "rewards/rejected": -0.8539595603942871, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 2.910060778827554e-07, |
| "logits/chosen": -0.27121174335479736, |
| "logits/rejected": 0.5390032529830933, |
| "logps/chosen": -682.1510009765625, |
| "logps/pi_response": -454.86712646484375, |
| "logps/ref_response": -201.3350372314453, |
| "logps/rejected": -1071.9420166015625, |
| "loss": 0.6021, |
| "rewards/accuracies": 0.5249999761581421, |
| "rewards/chosen": -0.36673516035079956, |
| "rewards/margins": 0.34540173411369324, |
| "rewards/rejected": -0.7121368646621704, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 2.3627616503391812e-07, |
| "logits/chosen": 0.09567543864250183, |
| "logits/rejected": 1.071440577507019, |
| "logps/chosen": -664.4251708984375, |
| "logps/pi_response": -515.3280029296875, |
| "logps/ref_response": -203.6631317138672, |
| "logps/rejected": -1307.692138671875, |
| "loss": 0.5966, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.3908023238182068, |
| "rewards/margins": 0.5106150507926941, |
| "rewards/rejected": -0.9014174342155457, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 1.8220596619089573e-07, |
| "logits/chosen": 0.02365037612617016, |
| "logits/rejected": 0.7325735092163086, |
| "logps/chosen": -522.3504638671875, |
| "logps/pi_response": -380.5610656738281, |
| "logps/ref_response": -213.95852661132812, |
| "logps/rejected": -976.3234252929688, |
| "loss": 0.6158, |
| "rewards/accuracies": 0.668749988079071, |
| "rewards/chosen": -0.24089224636554718, |
| "rewards/margins": 0.35071635246276855, |
| "rewards/rejected": -0.5916085839271545, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 1.3139467229135998e-07, |
| "logits/chosen": 0.14913706481456757, |
| "logits/rejected": 1.051020860671997, |
| "logps/chosen": -584.4959106445312, |
| "logps/pi_response": -380.89190673828125, |
| "logps/ref_response": -197.9862823486328, |
| "logps/rejected": -1066.765380859375, |
| "loss": 0.6126, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -0.2975122034549713, |
| "rewards/margins": 0.3768185079097748, |
| "rewards/rejected": -0.6743307113647461, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 8.628481651367875e-08, |
| "logits/chosen": 0.6599678993225098, |
| "logits/rejected": 1.237850546836853, |
| "logps/chosen": -677.4052124023438, |
| "logps/pi_response": -448.3097229003906, |
| "logps/ref_response": -183.84140014648438, |
| "logps/rejected": -1206.931884765625, |
| "loss": 0.5783, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.4090994894504547, |
| "rewards/margins": 0.4326961040496826, |
| "rewards/rejected": -0.8417955636978149, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 4.904486005914027e-08, |
| "logits/chosen": 0.6546910405158997, |
| "logits/rejected": 1.1750857830047607, |
| "logps/chosen": -641.6511840820312, |
| "logps/pi_response": -466.5652770996094, |
| "logps/ref_response": -192.61219787597656, |
| "logps/rejected": -1306.2027587890625, |
| "loss": 0.6047, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.36833977699279785, |
| "rewards/margins": 0.5396827459335327, |
| "rewards/rejected": -0.9080225229263306, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 2.1464952759020856e-08, |
| "logits/chosen": 0.6576046943664551, |
| "logits/rejected": 1.7164825201034546, |
| "logps/chosen": -646.2034912109375, |
| "logps/pi_response": -518.1434326171875, |
| "logps/ref_response": -199.8957977294922, |
| "logps/rejected": -1313.4241943359375, |
| "loss": 0.587, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.37260332703590393, |
| "rewards/margins": 0.556897759437561, |
| "rewards/rejected": -0.9295011758804321, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 4.8708793644441086e-09, |
| "logits/chosen": 0.3645651936531067, |
| "logits/rejected": 0.9882078170776367, |
| "logps/chosen": -661.0691528320312, |
| "logps/pi_response": -497.58856201171875, |
| "logps/ref_response": -216.212158203125, |
| "logps/rejected": -1136.5726318359375, |
| "loss": 0.594, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.3775670826435089, |
| "rewards/margins": 0.37788960337638855, |
| "rewards/rejected": -0.7554566264152527, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 159, |
| "total_flos": 0.0, |
| "train_loss": 0.6132173088361632, |
| "train_runtime": 4607.0079, |
| "train_samples_per_second": 4.423, |
| "train_steps_per_second": 0.035 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 159, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 100, |
| "total_flos": 0.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|