| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9945, |
| "eval_steps": 500, |
| "global_step": 153, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01, |
| "learning_rate": 3.125e-08, |
| "logits/chosen": -2.004561424255371, |
| "logits/rejected": -2.004561424255371, |
| "logps/chosen": -230.59202575683594, |
| "logps/rejected": -230.59202575683594, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 3.1249999999999997e-07, |
| "logits/chosen": -1.9280829429626465, |
| "logits/rejected": -1.9280829429626465, |
| "logps/chosen": -184.71426391601562, |
| "logps/rejected": -184.71426391601562, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.15811966359615326, |
| "rewards/chosen": -0.0006816095556132495, |
| "rewards/margins": 6.696617482759848e-09, |
| "rewards/rejected": -0.0006816161912865937, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.989490450759331e-07, |
| "logits/chosen": -1.9418092966079712, |
| "logits/rejected": -1.9418092966079712, |
| "logps/chosen": -197.00942993164062, |
| "logps/rejected": -197.00942993164062, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.15000000596046448, |
| "rewards/chosen": -0.02568601444363594, |
| "rewards/margins": -1.1841985170235603e-08, |
| "rewards/rejected": -0.025686005130410194, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.872270441827174e-07, |
| "logits/chosen": -1.768611192703247, |
| "logits/rejected": -1.768611192703247, |
| "logps/chosen": -218.05352783203125, |
| "logps/rejected": -218.0535125732422, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.13461539149284363, |
| "rewards/chosen": -0.2483261078596115, |
| "rewards/margins": -2.0081643015146255e-09, |
| "rewards/rejected": -0.24832607805728912, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 4.6308512113530063e-07, |
| "logits/chosen": -1.6662472486495972, |
| "logits/rejected": -1.6662472486495972, |
| "logps/chosen": -275.301513671875, |
| "logps/rejected": -275.301513671875, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.14615385234355927, |
| "rewards/chosen": -0.9216616153717041, |
| "rewards/margins": 1.6548885595213392e-09, |
| "rewards/rejected": -0.9216616153717041, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 4.277872161641681e-07, |
| "logits/chosen": -1.4318017959594727, |
| "logits/rejected": -1.4318017959594727, |
| "logps/chosen": -325.6317443847656, |
| "logps/rejected": -325.6317443847656, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.15000000596046448, |
| "rewards/chosen": -1.4385263919830322, |
| "rewards/margins": 6.762834736662171e-09, |
| "rewards/rejected": -1.4385263919830322, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 3.8318133624280046e-07, |
| "logits/chosen": -1.1741256713867188, |
| "logits/rejected": -1.1741256713867188, |
| "logps/chosen": -389.4248352050781, |
| "logps/rejected": -389.4248352050781, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.14230769872665405, |
| "rewards/chosen": -1.963220238685608, |
| "rewards/margins": 4.241099915702762e-09, |
| "rewards/rejected": -1.963220238685608, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 3.316028034595861e-07, |
| "logits/chosen": -1.0726745128631592, |
| "logits/rejected": -1.0726745128631592, |
| "logps/chosen": -422.3594665527344, |
| "logps/rejected": -422.3594665527344, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.16538462042808533, |
| "rewards/chosen": -2.3579981327056885, |
| "rewards/margins": 4.58497284583359e-09, |
| "rewards/rejected": -2.3579981327056885, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 2.7575199021178855e-07, |
| "logits/chosen": -0.9876527190208435, |
| "logits/rejected": -0.9876527190208435, |
| "logps/chosen": -491.9862365722656, |
| "logps/rejected": -491.9862365722656, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.13076923787593842, |
| "rewards/chosen": -3.068056344985962, |
| "rewards/margins": -3.2094809032656713e-09, |
| "rewards/rejected": -3.068056106567383, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 2.1855294234408068e-07, |
| "logits/chosen": -0.7263882756233215, |
| "logits/rejected": -0.7263882756233215, |
| "logps/chosen": -615.2776489257812, |
| "logps/rejected": -615.2776489257812, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.13846154510974884, |
| "rewards/chosen": -4.217284202575684, |
| "rewards/margins": 2.063237758420655e-08, |
| "rewards/rejected": -4.217283725738525, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 1.6300029195778453e-07, |
| "logits/chosen": -0.43152233958244324, |
| "logits/rejected": -0.43152233958244324, |
| "logps/chosen": -721.6929321289062, |
| "logps/rejected": -721.69287109375, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.1230769231915474, |
| "rewards/chosen": -5.2063374519348145, |
| "rewards/margins": -2.2007869304729866e-08, |
| "rewards/rejected": -5.2063374519348145, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 1.1200247470632392e-07, |
| "logits/chosen": -0.6822031736373901, |
| "logits/rejected": -0.6822031736373901, |
| "logps/chosen": -729.0514526367188, |
| "logps/rejected": -729.0514526367188, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.1269230842590332, |
| "rewards/chosen": -5.433934688568115, |
| "rewards/margins": 9.169945358600273e-10, |
| "rewards/rejected": -5.433934688568115, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 6.822945986946385e-08, |
| "logits/chosen": -0.5164610147476196, |
| "logits/rejected": -0.5164610147476196, |
| "logps/chosen": -775.7005004882812, |
| "logps/rejected": -775.7005004882812, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.1269230842590332, |
| "rewards/chosen": -5.883157253265381, |
| "rewards/margins": 4.7683716530855236e-08, |
| "rewards/rejected": -5.883157253265381, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 3.397296523427806e-08, |
| "logits/chosen": -0.38993218541145325, |
| "logits/rejected": -0.38993218541145325, |
| "logps/chosen": -804.00830078125, |
| "logps/rejected": -804.0083618164062, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.13076923787593842, |
| "rewards/chosen": -6.021703720092773, |
| "rewards/margins": 1.9256885863683237e-08, |
| "rewards/rejected": -6.021703243255615, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 1.1026475173977978e-08, |
| "logits/chosen": -0.47965455055236816, |
| "logits/rejected": -0.47965455055236816, |
| "logps/chosen": -805.8763427734375, |
| "logps/rejected": -805.8763427734375, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.1269230842590332, |
| "rewards/chosen": -6.223758220672607, |
| "rewards/margins": 2.6592841706474246e-08, |
| "rewards/rejected": -6.223758220672607, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 5.913435276374834e-10, |
| "logits/chosen": -0.5421052575111389, |
| "logits/rejected": -0.5421052575111389, |
| "logps/chosen": -826.6334838867188, |
| "logps/rejected": -826.6334838867188, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.11923076957464218, |
| "rewards/chosen": -6.313894271850586, |
| "rewards/margins": 4.401573860945973e-08, |
| "rewards/rejected": -6.313894271850586, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.99, |
| "step": 153, |
| "total_flos": 0.0, |
| "train_loss": 0.0, |
| "train_runtime": 0.0097, |
| "train_samples_per_second": 2062857.002, |
| "train_steps_per_second": 15780.856 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 153, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 200, |
| "total_flos": 0.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|