File size: 8,962 Bytes
87c9ccd | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 | {
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9945,
"eval_steps": 500,
"global_step": 153,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"grad_norm": 17.363519218688417,
"learning_rate": 3.125e-08,
"logits/chosen": -2.205641031265259,
"logits/rejected": -2.2929024696350098,
"logps/chosen": -215.50050354003906,
"logps/rejected": -237.99966430664062,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.07,
"grad_norm": 18.34878510832685,
"learning_rate": 3.1249999999999997e-07,
"logits/chosen": -2.382091522216797,
"logits/rejected": -2.295259952545166,
"logps/chosen": -222.111328125,
"logps/rejected": -210.6314697265625,
"loss": 0.693,
"rewards/accuracies": 0.4615384638309479,
"rewards/chosen": 0.0012403662549331784,
"rewards/margins": 0.0005746442475356162,
"rewards/rejected": 0.0006657222402282059,
"step": 10
},
{
"epoch": 0.13,
"grad_norm": 18.337158741008302,
"learning_rate": 4.989490450759331e-07,
"logits/chosen": -2.383176803588867,
"logits/rejected": -2.422689199447632,
"logps/chosen": -212.94821166992188,
"logps/rejected": -232.4334259033203,
"loss": 0.6895,
"rewards/accuracies": 0.5384615659713745,
"rewards/chosen": -0.08582816272974014,
"rewards/margins": 0.0013778842985630035,
"rewards/rejected": -0.08720605075359344,
"step": 20
},
{
"epoch": 0.2,
"grad_norm": 22.542054212888594,
"learning_rate": 4.872270441827174e-07,
"logits/chosen": -2.3914377689361572,
"logits/rejected": -2.3520281314849854,
"logps/chosen": -215.9875030517578,
"logps/rejected": -227.70399475097656,
"loss": 0.6861,
"rewards/accuracies": 0.5384615659713745,
"rewards/chosen": -0.25440388917922974,
"rewards/margins": 0.03694874048233032,
"rewards/rejected": -0.29135259985923767,
"step": 30
},
{
"epoch": 0.26,
"grad_norm": 18.6076942310058,
"learning_rate": 4.6308512113530063e-07,
"logits/chosen": -2.4768216609954834,
"logits/rejected": -2.4233009815216064,
"logps/chosen": -229.41555786132812,
"logps/rejected": -242.26214599609375,
"loss": 0.6925,
"rewards/accuracies": 0.5384615659713745,
"rewards/chosen": -0.19061818718910217,
"rewards/margins": 0.01797662116587162,
"rewards/rejected": -0.20859479904174805,
"step": 40
},
{
"epoch": 0.33,
"grad_norm": 15.540618809394221,
"learning_rate": 4.277872161641681e-07,
"logits/chosen": -2.5903377532958984,
"logits/rejected": -2.5592682361602783,
"logps/chosen": -225.72836303710938,
"logps/rejected": -240.36595153808594,
"loss": 0.6897,
"rewards/accuracies": 0.5307692289352417,
"rewards/chosen": -0.033847782760858536,
"rewards/margins": 0.001607205718755722,
"rewards/rejected": -0.03545498102903366,
"step": 50
},
{
"epoch": 0.39,
"grad_norm": 24.02478451571149,
"learning_rate": 3.8318133624280046e-07,
"logits/chosen": -2.6142632961273193,
"logits/rejected": -2.6310534477233887,
"logps/chosen": -223.6808624267578,
"logps/rejected": -255.2255401611328,
"loss": 0.6903,
"rewards/accuracies": 0.5423076748847961,
"rewards/chosen": -0.10960451513528824,
"rewards/margins": 0.02808019518852234,
"rewards/rejected": -0.13768470287322998,
"step": 60
},
{
"epoch": 0.46,
"grad_norm": 20.288409772533704,
"learning_rate": 3.316028034595861e-07,
"logits/chosen": -2.5127460956573486,
"logits/rejected": -2.5037307739257812,
"logps/chosen": -230.75833129882812,
"logps/rejected": -256.0094909667969,
"loss": 0.6832,
"rewards/accuracies": 0.4961538314819336,
"rewards/chosen": -0.2965443730354309,
"rewards/margins": 0.03154058754444122,
"rewards/rejected": -0.3280849754810333,
"step": 70
},
{
"epoch": 0.52,
"grad_norm": 22.680428500041128,
"learning_rate": 2.7575199021178855e-07,
"logits/chosen": -2.5435211658477783,
"logits/rejected": -2.5064070224761963,
"logps/chosen": -251.8400421142578,
"logps/rejected": -273.2138671875,
"loss": 0.6778,
"rewards/accuracies": 0.5538461804389954,
"rewards/chosen": -0.28813430666923523,
"rewards/margins": 0.06685086339712143,
"rewards/rejected": -0.3549851179122925,
"step": 80
},
{
"epoch": 0.58,
"grad_norm": 18.86897478499211,
"learning_rate": 2.1855294234408068e-07,
"logits/chosen": -2.5077600479125977,
"logits/rejected": -2.503957986831665,
"logps/chosen": -251.26556396484375,
"logps/rejected": -242.37310791015625,
"loss": 0.6842,
"rewards/accuracies": 0.5307692289352417,
"rewards/chosen": -0.24721869826316833,
"rewards/margins": 0.009251880459487438,
"rewards/rejected": -0.25647059082984924,
"step": 90
},
{
"epoch": 0.65,
"grad_norm": 20.56846597020704,
"learning_rate": 1.6300029195778453e-07,
"logits/chosen": -2.492692708969116,
"logits/rejected": -2.3162038326263428,
"logps/chosen": -253.9385223388672,
"logps/rejected": -268.18414306640625,
"loss": 0.6821,
"rewards/accuracies": 0.5538461804389954,
"rewards/chosen": -0.4339679181575775,
"rewards/margins": 0.06273461133241653,
"rewards/rejected": -0.49670252203941345,
"step": 100
},
{
"epoch": 0.71,
"grad_norm": 29.852940409061166,
"learning_rate": 1.1200247470632392e-07,
"logits/chosen": -2.409569501876831,
"logits/rejected": -2.3867456912994385,
"logps/chosen": -297.3172607421875,
"logps/rejected": -285.6565246582031,
"loss": 0.6823,
"rewards/accuracies": 0.4923076927661896,
"rewards/chosen": -0.73952317237854,
"rewards/margins": 0.020068956539034843,
"rewards/rejected": -0.7595921754837036,
"step": 110
},
{
"epoch": 0.78,
"grad_norm": 20.239291104683563,
"learning_rate": 6.822945986946385e-08,
"logits/chosen": -2.3316874504089355,
"logits/rejected": -2.2675819396972656,
"logps/chosen": -275.67767333984375,
"logps/rejected": -291.9703674316406,
"loss": 0.6858,
"rewards/accuracies": 0.5384615659713745,
"rewards/chosen": -0.5500468611717224,
"rewards/margins": 0.06261468678712845,
"rewards/rejected": -0.6126615405082703,
"step": 120
},
{
"epoch": 0.84,
"grad_norm": 24.27835409910822,
"learning_rate": 3.397296523427806e-08,
"logits/chosen": -2.4436075687408447,
"logits/rejected": -2.309699296951294,
"logps/chosen": -233.41680908203125,
"logps/rejected": -262.0289611816406,
"loss": 0.6782,
"rewards/accuracies": 0.5807692408561707,
"rewards/chosen": -0.36947229504585266,
"rewards/margins": 0.0935312956571579,
"rewards/rejected": -0.46300360560417175,
"step": 130
},
{
"epoch": 0.91,
"grad_norm": 24.790097756991855,
"learning_rate": 1.1026475173977978e-08,
"logits/chosen": -2.3928282260894775,
"logits/rejected": -2.382559299468994,
"logps/chosen": -244.34410095214844,
"logps/rejected": -248.87876892089844,
"loss": 0.6769,
"rewards/accuracies": 0.5615384578704834,
"rewards/chosen": -0.27959996461868286,
"rewards/margins": 0.04778864234685898,
"rewards/rejected": -0.32738858461380005,
"step": 140
},
{
"epoch": 0.97,
"grad_norm": 24.029709323211232,
"learning_rate": 5.913435276374834e-10,
"logits/chosen": -2.4940154552459717,
"logits/rejected": -2.3921005725860596,
"logps/chosen": -241.1715545654297,
"logps/rejected": -262.944580078125,
"loss": 0.6737,
"rewards/accuracies": 0.6192307472229004,
"rewards/chosen": -0.2436859905719757,
"rewards/margins": 0.09733694791793823,
"rewards/rejected": -0.34102290868759155,
"step": 150
},
{
"epoch": 0.99,
"step": 153,
"total_flos": 0.0,
"train_loss": 0.6839175197034101,
"train_runtime": 39806.5992,
"train_samples_per_second": 0.502,
"train_steps_per_second": 0.004
}
],
"logging_steps": 10,
"max_steps": 153,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}
|