File size: 8,898 Bytes
242682b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 | {
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 155,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 3.125e-07,
"logits/chosen": -2.8946032524108887,
"logits/rejected": -2.8606104850769043,
"logps/chosen": -149.65138244628906,
"logps/rejected": -147.93389892578125,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.06,
"learning_rate": 3.125e-06,
"logits/chosen": -2.7723653316497803,
"logits/rejected": -2.7413721084594727,
"logps/chosen": -127.34146881103516,
"logps/rejected": -128.1923065185547,
"loss": 0.693,
"rewards/accuracies": 0.46875,
"rewards/chosen": 0.0019323664018884301,
"rewards/margins": 0.0003104716306552291,
"rewards/rejected": 0.001621894771233201,
"step": 10
},
{
"epoch": 0.13,
"learning_rate": 4.989790503518888e-06,
"logits/chosen": -2.78098464012146,
"logits/rejected": -2.7467544078826904,
"logps/chosen": -123.1203384399414,
"logps/rejected": -123.0350341796875,
"loss": 0.6922,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": 0.028792519122362137,
"rewards/margins": 0.0023718727752566338,
"rewards/rejected": 0.026420647278428078,
"step": 20
},
{
"epoch": 0.19,
"learning_rate": 4.8758891663695165e-06,
"logits/chosen": -2.7641711235046387,
"logits/rejected": -2.7216341495513916,
"logps/chosen": -122.10086822509766,
"logps/rejected": -121.503662109375,
"loss": 0.6897,
"rewards/accuracies": 0.565625011920929,
"rewards/chosen": 0.07782017439603806,
"rewards/margins": 0.005080958362668753,
"rewards/rejected": 0.07273921370506287,
"step": 30
},
{
"epoch": 0.26,
"learning_rate": 4.64113778721764e-06,
"logits/chosen": -2.7458434104919434,
"logits/rejected": -2.717912197113037,
"logps/chosen": -109.0793228149414,
"logps/rejected": -111.13133239746094,
"loss": 0.6888,
"rewards/accuracies": 0.5625,
"rewards/chosen": 0.0947461873292923,
"rewards/margins": 0.010546943172812462,
"rewards/rejected": 0.08419923484325409,
"step": 40
},
{
"epoch": 0.32,
"learning_rate": 4.297477038394368e-06,
"logits/chosen": -2.7164647579193115,
"logits/rejected": -2.6716389656066895,
"logps/chosen": -124.75947570800781,
"logps/rejected": -124.14982604980469,
"loss": 0.6863,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": 0.07808887958526611,
"rewards/margins": 0.012092621996998787,
"rewards/rejected": 0.06599625200033188,
"step": 50
},
{
"epoch": 0.39,
"learning_rate": 3.862387287468095e-06,
"logits/chosen": -2.7368290424346924,
"logits/rejected": -2.7067949771881104,
"logps/chosen": -122.44651794433594,
"logps/rejected": -123.14102935791016,
"loss": 0.6814,
"rewards/accuracies": 0.59375,
"rewards/chosen": 0.04377375915646553,
"rewards/margins": 0.0176816638559103,
"rewards/rejected": 0.02609209157526493,
"step": 60
},
{
"epoch": 0.45,
"learning_rate": 3.357999455114148e-06,
"logits/chosen": -2.6553173065185547,
"logits/rejected": -2.6178958415985107,
"logps/chosen": -131.94534301757812,
"logps/rejected": -128.55323791503906,
"loss": 0.6783,
"rewards/accuracies": 0.628125011920929,
"rewards/chosen": 0.01449825894087553,
"rewards/margins": 0.04684123024344444,
"rewards/rejected": -0.03234297037124634,
"step": 70
},
{
"epoch": 0.52,
"learning_rate": 2.8099693217402807e-06,
"logits/chosen": -2.680434226989746,
"logits/rejected": -2.652169704437256,
"logps/chosen": -135.7716522216797,
"logps/rejected": -137.4171142578125,
"loss": 0.6796,
"rewards/accuracies": 0.628125011920929,
"rewards/chosen": -0.02753743901848793,
"rewards/margins": 0.03442533686757088,
"rewards/rejected": -0.06196277216076851,
"step": 80
},
{
"epoch": 0.58,
"learning_rate": 2.24617254146973e-06,
"logits/chosen": -2.683811664581299,
"logits/rejected": -2.6760880947113037,
"logps/chosen": -109.57649230957031,
"logps/rejected": -118.2197265625,
"loss": 0.6791,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": 0.08844764530658722,
"rewards/margins": 0.03314244747161865,
"rewards/rejected": 0.055305201560258865,
"step": 90
},
{
"epoch": 0.65,
"learning_rate": 1.6952867418370707e-06,
"logits/chosen": -2.662198543548584,
"logits/rejected": -2.6310229301452637,
"logps/chosen": -123.90461730957031,
"logps/rejected": -129.31741333007812,
"loss": 0.6763,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": 0.07869021594524384,
"rewards/margins": 0.050858981907367706,
"rewards/rejected": 0.027831237763166428,
"step": 100
},
{
"epoch": 0.65,
"eval_logits/chosen": -2.662461042404175,
"eval_logits/rejected": -2.5754306316375732,
"eval_logps/chosen": -289.1737365722656,
"eval_logps/rejected": -269.4944763183594,
"eval_loss": 0.6652680039405823,
"eval_rewards/accuracies": 0.6399999856948853,
"eval_rewards/chosen": -0.02158256620168686,
"eval_rewards/margins": 0.060695480555295944,
"eval_rewards/rejected": -0.0822780430316925,
"eval_runtime": 382.0664,
"eval_samples_per_second": 5.235,
"eval_steps_per_second": 0.654,
"step": 100
},
{
"epoch": 0.71,
"learning_rate": 1.1853328309581139e-06,
"logits/chosen": -2.6986496448516846,
"logits/rejected": -2.6445116996765137,
"logps/chosen": -126.5402603149414,
"logps/rejected": -126.73878479003906,
"loss": 0.6724,
"rewards/accuracies": 0.6031249761581421,
"rewards/chosen": 0.008479948155581951,
"rewards/margins": 0.04984438419342041,
"rewards/rejected": -0.04136443883180618,
"step": 110
},
{
"epoch": 0.77,
"learning_rate": 7.422497088545436e-07,
"logits/chosen": -2.6892261505126953,
"logits/rejected": -2.61507511138916,
"logps/chosen": -121.74040222167969,
"logps/rejected": -122.19820404052734,
"loss": 0.6739,
"rewards/accuracies": 0.5531250238418579,
"rewards/chosen": -0.008245681412518024,
"rewards/margins": 0.041609544306993484,
"rewards/rejected": -0.049855224788188934,
"step": 120
},
{
"epoch": 0.84,
"learning_rate": 3.8857488050544903e-07,
"logits/chosen": -2.6735100746154785,
"logits/rejected": -2.6348578929901123,
"logps/chosen": -125.81143951416016,
"logps/rejected": -133.61953735351562,
"loss": 0.6736,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": 0.015428361482918262,
"rewards/margins": 0.057859934866428375,
"rewards/rejected": -0.04243157058954239,
"step": 130
},
{
"epoch": 0.9,
"learning_rate": 1.4229808148697732e-07,
"logits/chosen": -2.6265532970428467,
"logits/rejected": -2.633122205734253,
"logps/chosen": -131.5351104736328,
"logps/rejected": -136.5831756591797,
"loss": 0.6743,
"rewards/accuracies": 0.6031249761581421,
"rewards/chosen": 0.021531684324145317,
"rewards/margins": 0.03703152388334274,
"rewards/rejected": -0.015499832108616829,
"step": 140
},
{
"epoch": 0.97,
"learning_rate": 1.5946226744029402e-08,
"logits/chosen": -2.687284231185913,
"logits/rejected": -2.641932249069214,
"logps/chosen": -122.74555969238281,
"logps/rejected": -129.6962127685547,
"loss": 0.6756,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.03600780665874481,
"rewards/margins": 0.05919266864657402,
"rewards/rejected": -0.023184867575764656,
"step": 150
},
{
"epoch": 1.0,
"step": 155,
"total_flos": 0.0,
"train_loss": 0.6804818130308582,
"train_runtime": 6451.0629,
"train_samples_per_second": 3.075,
"train_steps_per_second": 0.024
}
],
"logging_steps": 10,
"max_steps": 155,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}
|