File size: 11,139 Bytes
cb043d9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 | {
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9999015651146766,
"eval_steps": 500,
"global_step": 5079,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05000492174426617,
"grad_norm": 5.337889194488525,
"learning_rate": 5e-07,
"logits/chosen": -0.5339647531509399,
"logits/rejected": -0.47227513790130615,
"logps/chosen": -71.58138275146484,
"logps/rejected": -13.533102989196777,
"loss": 0.6865,
"rewards/accuracies": 0.5871062874794006,
"rewards/chosen": 0.012079809792339802,
"rewards/margins": 0.013962473720312119,
"rewards/rejected": -0.001882663695141673,
"step": 254
},
{
"epoch": 0.10000984348853234,
"grad_norm": 5.7681660652160645,
"learning_rate": 1e-06,
"logits/chosen": -0.524512529373169,
"logits/rejected": -0.470420241355896,
"logps/chosen": -68.49125671386719,
"logps/rejected": -14.996256828308105,
"loss": 0.5103,
"rewards/accuracies": 0.8449802994728088,
"rewards/chosen": 0.4529332220554352,
"rewards/margins": 0.5830409526824951,
"rewards/rejected": -0.13010773062705994,
"step": 508
},
{
"epoch": 0.1500147652327985,
"grad_norm": 1.7311837673187256,
"learning_rate": 9.44432290527237e-07,
"logits/chosen": -0.5406456589698792,
"logits/rejected": -0.49012085795402527,
"logps/chosen": -61.494964599609375,
"logps/rejected": -28.769521713256836,
"loss": 0.1423,
"rewards/accuracies": 0.9788385629653931,
"rewards/chosen": 1.3764965534210205,
"rewards/margins": 2.9073357582092285,
"rewards/rejected": -1.5308390855789185,
"step": 762
},
{
"epoch": 0.20001968697706468,
"grad_norm": 4.213954925537109,
"learning_rate": 8.888645810544738e-07,
"logits/chosen": -0.5104743838310242,
"logits/rejected": -0.43841081857681274,
"logps/chosen": -56.98832702636719,
"logps/rejected": -44.329437255859375,
"loss": 0.0364,
"rewards/accuracies": 0.9872047305107117,
"rewards/chosen": 1.6219738721847534,
"rewards/margins": 4.794076919555664,
"rewards/rejected": -3.1721031665802,
"step": 1016
},
{
"epoch": 0.25002460872133087,
"grad_norm": 0.14951969683170319,
"learning_rate": 8.332968715817108e-07,
"logits/chosen": -0.4608861804008484,
"logits/rejected": -0.3742350935935974,
"logps/chosen": -55.1967887878418,
"logps/rejected": -51.54916000366211,
"loss": 0.0258,
"rewards/accuracies": 0.9886810779571533,
"rewards/chosen": 1.6978679895401,
"rewards/margins": 5.571296691894531,
"rewards/rejected": -3.8734288215637207,
"step": 1270
},
{
"epoch": 0.300029530465597,
"grad_norm": 0.11377181112766266,
"learning_rate": 7.777291621089477e-07,
"logits/chosen": -0.4032284915447235,
"logits/rejected": -0.2789752185344696,
"logps/chosen": -57.66849899291992,
"logps/rejected": -57.404354095458984,
"loss": 0.0181,
"rewards/accuracies": 0.9936023354530334,
"rewards/chosen": 1.815442681312561,
"rewards/margins": 6.287877559661865,
"rewards/rejected": -4.472434043884277,
"step": 1524
},
{
"epoch": 0.3500344522098632,
"grad_norm": 61.85912322998047,
"learning_rate": 7.221614526361847e-07,
"logits/chosen": -0.39197683334350586,
"logits/rejected": -0.2711484432220459,
"logps/chosen": -56.51285934448242,
"logps/rejected": -66.6546630859375,
"loss": 0.0243,
"rewards/accuracies": 0.9906495809555054,
"rewards/chosen": 1.6953144073486328,
"rewards/margins": 6.998918533325195,
"rewards/rejected": -5.3036041259765625,
"step": 1778
},
{
"epoch": 0.40003937395412936,
"grad_norm": 0.09933885931968689,
"learning_rate": 6.665937431634215e-07,
"logits/chosen": -0.35676872730255127,
"logits/rejected": -0.21091562509536743,
"logps/chosen": -58.933048248291016,
"logps/rejected": -72.55093383789062,
"loss": 0.022,
"rewards/accuracies": 0.9906495809555054,
"rewards/chosen": 1.709058165550232,
"rewards/margins": 7.6529622077941895,
"rewards/rejected": -5.943903923034668,
"step": 2032
},
{
"epoch": 0.4500442956983955,
"grad_norm": 0.3078814446926117,
"learning_rate": 6.110260336906585e-07,
"logits/chosen": -0.3692930340766907,
"logits/rejected": -0.21181651949882507,
"logps/chosen": -56.33852767944336,
"logps/rejected": -78.77378845214844,
"loss": 0.0193,
"rewards/accuracies": 0.9916338324546814,
"rewards/chosen": 1.583296775817871,
"rewards/margins": 8.18080997467041,
"rewards/rejected": -6.597513675689697,
"step": 2286
},
{
"epoch": 0.5000492174426617,
"grad_norm": 0.026140812784433365,
"learning_rate": 5.554583242178954e-07,
"logits/chosen": -0.36246979236602783,
"logits/rejected": -0.21610520780086517,
"logps/chosen": -56.37531280517578,
"logps/rejected": -81.92805480957031,
"loss": 0.0298,
"rewards/accuracies": 0.9886810779571533,
"rewards/chosen": 1.608428955078125,
"rewards/margins": 8.545919418334961,
"rewards/rejected": -6.9374895095825195,
"step": 2540
},
{
"epoch": 0.5500541391869278,
"grad_norm": 0.0485980287194252,
"learning_rate": 4.998906147451324e-07,
"logits/chosen": -0.35004737973213196,
"logits/rejected": -0.1877668797969818,
"logps/chosen": -56.29869079589844,
"logps/rejected": -84.21609497070312,
"loss": 0.0229,
"rewards/accuracies": 0.9901574850082397,
"rewards/chosen": 1.6171692609786987,
"rewards/margins": 8.883115768432617,
"rewards/rejected": -7.265947341918945,
"step": 2794
},
{
"epoch": 0.600059060931194,
"grad_norm": 0.13149231672286987,
"learning_rate": 4.4432290527236927e-07,
"logits/chosen": -0.3316061198711395,
"logits/rejected": -0.17913725972175598,
"logps/chosen": -57.6456413269043,
"logps/rejected": -89.23247528076172,
"loss": 0.0144,
"rewards/accuracies": 0.9960629940032959,
"rewards/chosen": 1.622791051864624,
"rewards/margins": 9.201993942260742,
"rewards/rejected": -7.579202651977539,
"step": 3048
},
{
"epoch": 0.6500639826754602,
"grad_norm": 0.03225807845592499,
"learning_rate": 3.887551957996062e-07,
"logits/chosen": -0.3303147554397583,
"logits/rejected": -0.16745421290397644,
"logps/chosen": -57.618045806884766,
"logps/rejected": -90.39539337158203,
"loss": 0.0165,
"rewards/accuracies": 0.9921259880065918,
"rewards/chosen": 1.5876142978668213,
"rewards/margins": 9.376455307006836,
"rewards/rejected": -7.788840293884277,
"step": 3302
},
{
"epoch": 0.7000689044197264,
"grad_norm": 0.024763241410255432,
"learning_rate": 3.3318748632684314e-07,
"logits/chosen": -0.3256986141204834,
"logits/rejected": -0.1574079841375351,
"logps/chosen": -59.10237121582031,
"logps/rejected": -92.18179321289062,
"loss": 0.0203,
"rewards/accuracies": 0.9901574850082397,
"rewards/chosen": 1.5978204011917114,
"rewards/margins": 9.474949836730957,
"rewards/rejected": -7.877129554748535,
"step": 3556
},
{
"epoch": 0.7500738261639925,
"grad_norm": 0.18685077130794525,
"learning_rate": 2.7761977685408005e-07,
"logits/chosen": -0.31320706009864807,
"logits/rejected": -0.14784303307533264,
"logps/chosen": -58.15943908691406,
"logps/rejected": -92.56378936767578,
"loss": 0.0265,
"rewards/accuracies": 0.9906495809555054,
"rewards/chosen": 1.5292613506317139,
"rewards/margins": 9.509092330932617,
"rewards/rejected": -7.979831218719482,
"step": 3810
},
{
"epoch": 0.8000787479082587,
"grad_norm": 0.01488853245973587,
"learning_rate": 2.22052067381317e-07,
"logits/chosen": -0.3247720003128052,
"logits/rejected": -0.15560078620910645,
"logps/chosen": -56.86127471923828,
"logps/rejected": -93.76302337646484,
"loss": 0.0189,
"rewards/accuracies": 0.9931102395057678,
"rewards/chosen": 1.586428165435791,
"rewards/margins": 9.719764709472656,
"rewards/rejected": -8.133337020874023,
"step": 4064
},
{
"epoch": 0.8500836696525249,
"grad_norm": 4.113521575927734,
"learning_rate": 1.6648435790855392e-07,
"logits/chosen": -0.31977561116218567,
"logits/rejected": -0.164890855550766,
"logps/chosen": -56.98260498046875,
"logps/rejected": -95.06165313720703,
"loss": 0.0233,
"rewards/accuracies": 0.9921259880065918,
"rewards/chosen": 1.4957386255264282,
"rewards/margins": 9.688507080078125,
"rewards/rejected": -8.192767143249512,
"step": 4318
},
{
"epoch": 0.900088591396791,
"grad_norm": 0.0642678439617157,
"learning_rate": 1.1091664843579085e-07,
"logits/chosen": -0.3185438811779022,
"logits/rejected": -0.1604050248861313,
"logps/chosen": -57.956459045410156,
"logps/rejected": -96.31430053710938,
"loss": 0.0202,
"rewards/accuracies": 0.9916338324546814,
"rewards/chosen": 1.4874851703643799,
"rewards/margins": 9.749979019165039,
"rewards/rejected": -8.262493133544922,
"step": 4572
},
{
"epoch": 0.9500935131410572,
"grad_norm": 0.0038960117381066084,
"learning_rate": 5.534893896302778e-08,
"logits/chosen": -0.3140643537044525,
"logits/rejected": -0.1565851867198944,
"logps/chosen": -59.727909088134766,
"logps/rejected": -95.10466003417969,
"loss": 0.0173,
"rewards/accuracies": 0.9936023354530334,
"rewards/chosen": 1.604878544807434,
"rewards/margins": 9.842850685119629,
"rewards/rejected": -8.237971305847168,
"step": 4826
},
{
"epoch": 0.9999015651146766,
"step": 5079,
"total_flos": 3.074560994106409e+18,
"train_loss": 0.08538520530960554,
"train_runtime": 75713.4377,
"train_samples_per_second": 0.537,
"train_steps_per_second": 0.067
}
],
"logging_steps": 254,
"max_steps": 5079,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.074560994106409e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}
|