Training in progress, epoch 0, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 3422229144
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a56b5df78edf5b3fb567bbf51cfe04d6b9650cbc6910e4b129e3fd4ded457bfc
|
| 3 |
size 3422229144
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1738272709
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dd83f0b7b55b377877d04abc7e1db1bc6ad975a510498e854f134608cae3e1e1
|
| 3 |
size 1738272709
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:181c5f0270cf39930062ddfa3767a2481d0c360f120b11f8e25dbf533a1cdaba
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ebcc2399037993f023f137c65edec3c249a67d9c697cb3b86a0c75a31475d419
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
-
"best_metric": 0.
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -174,6 +174,172 @@
|
|
| 174 |
"eval_samples_per_second": 11.253,
|
| 175 |
"eval_steps_per_second": 0.719,
|
| 176 |
"step": 50
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
}
|
| 178 |
],
|
| 179 |
"logging_steps": 5,
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
+
"best_metric": 0.3611069321632385,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.9779951100244498,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 100,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 174 |
"eval_samples_per_second": 11.253,
|
| 175 |
"eval_steps_per_second": 0.719,
|
| 176 |
"step": 50
|
| 177 |
+
},
|
| 178 |
+
{
|
| 179 |
+
"epoch": 0.5378973105134475,
|
| 180 |
+
"grad_norm": 0.0003851531946565956,
|
| 181 |
+
"learning_rate": 8.442723263480497e-06,
|
| 182 |
+
"logits/chosen": NaN,
|
| 183 |
+
"logits/rejected": 0.974880039691925,
|
| 184 |
+
"logps/chosen": -64.03724670410156,
|
| 185 |
+
"logps/rejected": -108.10626220703125,
|
| 186 |
+
"loss": 0.3436,
|
| 187 |
+
"rewards/accuracies": 0.8125,
|
| 188 |
+
"rewards/chosen": 68.7229995727539,
|
| 189 |
+
"rewards/margins": 85.6202621459961,
|
| 190 |
+
"rewards/rejected": -16.897260665893555,
|
| 191 |
+
"step": 55
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"epoch": 0.58679706601467,
|
| 195 |
+
"grad_norm": 0.0005342594813555479,
|
| 196 |
+
"learning_rate": 9.22445689898795e-06,
|
| 197 |
+
"logits/chosen": NaN,
|
| 198 |
+
"logits/rejected": 0.6203755140304565,
|
| 199 |
+
"logps/chosen": -81.26509857177734,
|
| 200 |
+
"logps/rejected": -104.5843276977539,
|
| 201 |
+
"loss": 0.3086,
|
| 202 |
+
"rewards/accuracies": 0.8843749761581421,
|
| 203 |
+
"rewards/chosen": 69.86774444580078,
|
| 204 |
+
"rewards/margins": 82.07366180419922,
|
| 205 |
+
"rewards/rejected": -12.205923080444336,
|
| 206 |
+
"step": 60
|
| 207 |
+
},
|
| 208 |
+
{
|
| 209 |
+
"epoch": 0.6356968215158925,
|
| 210 |
+
"grad_norm": 4.174908845282904e-13,
|
| 211 |
+
"learning_rate": 1.0006190534495405e-05,
|
| 212 |
+
"logits/chosen": NaN,
|
| 213 |
+
"logits/rejected": 0.4278396666049957,
|
| 214 |
+
"logps/chosen": -39.61051940917969,
|
| 215 |
+
"logps/rejected": -92.36933898925781,
|
| 216 |
+
"loss": 0.2063,
|
| 217 |
+
"rewards/accuracies": 0.745312511920929,
|
| 218 |
+
"rewards/chosen": 72.73802947998047,
|
| 219 |
+
"rewards/margins": 85.91956329345703,
|
| 220 |
+
"rewards/rejected": -13.18153190612793,
|
| 221 |
+
"step": 65
|
| 222 |
+
},
|
| 223 |
+
{
|
| 224 |
+
"epoch": 0.684596577017115,
|
| 225 |
+
"grad_norm": 0.002292018150910735,
|
| 226 |
+
"learning_rate": 1.0787924170002858e-05,
|
| 227 |
+
"logits/chosen": NaN,
|
| 228 |
+
"logits/rejected": 0.28609800338745117,
|
| 229 |
+
"logps/chosen": -33.6094856262207,
|
| 230 |
+
"logps/rejected": -84.01991271972656,
|
| 231 |
+
"loss": 0.1113,
|
| 232 |
+
"rewards/accuracies": 0.859375,
|
| 233 |
+
"rewards/chosen": 91.45848083496094,
|
| 234 |
+
"rewards/margins": 93.42339324951172,
|
| 235 |
+
"rewards/rejected": -1.96491277217865,
|
| 236 |
+
"step": 70
|
| 237 |
+
},
|
| 238 |
+
{
|
| 239 |
+
"epoch": 0.7334963325183375,
|
| 240 |
+
"grad_norm": 0.6431168913841248,
|
| 241 |
+
"learning_rate": 1.156965780551031e-05,
|
| 242 |
+
"logits/chosen": NaN,
|
| 243 |
+
"logits/rejected": 0.2203584611415863,
|
| 244 |
+
"logps/chosen": -40.122108459472656,
|
| 245 |
+
"logps/rejected": -93.89823150634766,
|
| 246 |
+
"loss": 0.1585,
|
| 247 |
+
"rewards/accuracies": 0.7718750238418579,
|
| 248 |
+
"rewards/chosen": 74.9581069946289,
|
| 249 |
+
"rewards/margins": 88.4476318359375,
|
| 250 |
+
"rewards/rejected": -13.489527702331543,
|
| 251 |
+
"step": 75
|
| 252 |
+
},
|
| 253 |
+
{
|
| 254 |
+
"epoch": 0.78239608801956,
|
| 255 |
+
"grad_norm": 18.794679641723633,
|
| 256 |
+
"learning_rate": 1.2351391441017764e-05,
|
| 257 |
+
"logits/chosen": NaN,
|
| 258 |
+
"logits/rejected": 0.35011741518974304,
|
| 259 |
+
"logps/chosen": -43.437705993652344,
|
| 260 |
+
"logps/rejected": -116.1953353881836,
|
| 261 |
+
"loss": 0.1879,
|
| 262 |
+
"rewards/accuracies": 0.78125,
|
| 263 |
+
"rewards/chosen": 74.13380432128906,
|
| 264 |
+
"rewards/margins": 96.4022445678711,
|
| 265 |
+
"rewards/rejected": -22.268436431884766,
|
| 266 |
+
"step": 80
|
| 267 |
+
},
|
| 268 |
+
{
|
| 269 |
+
"epoch": 0.8312958435207825,
|
| 270 |
+
"grad_norm": 53.7838020324707,
|
| 271 |
+
"learning_rate": 1.3133125076525218e-05,
|
| 272 |
+
"logits/chosen": NaN,
|
| 273 |
+
"logits/rejected": 0.5478571653366089,
|
| 274 |
+
"logps/chosen": -37.990631103515625,
|
| 275 |
+
"logps/rejected": -131.7003173828125,
|
| 276 |
+
"loss": 0.2416,
|
| 277 |
+
"rewards/accuracies": 0.815625011920929,
|
| 278 |
+
"rewards/chosen": 83.08360290527344,
|
| 279 |
+
"rewards/margins": 111.3319320678711,
|
| 280 |
+
"rewards/rejected": -28.248327255249023,
|
| 281 |
+
"step": 85
|
| 282 |
+
},
|
| 283 |
+
{
|
| 284 |
+
"epoch": 0.8801955990220048,
|
| 285 |
+
"grad_norm": 33.947601318359375,
|
| 286 |
+
"learning_rate": 1.3914858712032673e-05,
|
| 287 |
+
"logits/chosen": NaN,
|
| 288 |
+
"logits/rejected": 0.736972451210022,
|
| 289 |
+
"logps/chosen": -23.155391693115234,
|
| 290 |
+
"logps/rejected": -108.7579116821289,
|
| 291 |
+
"loss": 0.4442,
|
| 292 |
+
"rewards/accuracies": 0.676562488079071,
|
| 293 |
+
"rewards/chosen": 70.76764678955078,
|
| 294 |
+
"rewards/margins": 92.23793029785156,
|
| 295 |
+
"rewards/rejected": -21.47028923034668,
|
| 296 |
+
"step": 90
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"epoch": 0.9290953545232273,
|
| 300 |
+
"grad_norm": 9.191290306978517e-13,
|
| 301 |
+
"learning_rate": 1.4696592347540126e-05,
|
| 302 |
+
"logits/chosen": NaN,
|
| 303 |
+
"logits/rejected": 0.5948934555053711,
|
| 304 |
+
"logps/chosen": -22.43905258178711,
|
| 305 |
+
"logps/rejected": -86.31062316894531,
|
| 306 |
+
"loss": 0.1939,
|
| 307 |
+
"rewards/accuracies": 0.7203124761581421,
|
| 308 |
+
"rewards/chosen": 75.99381256103516,
|
| 309 |
+
"rewards/margins": 85.18391418457031,
|
| 310 |
+
"rewards/rejected": -9.190110206604004,
|
| 311 |
+
"step": 95
|
| 312 |
+
},
|
| 313 |
+
{
|
| 314 |
+
"epoch": 0.9779951100244498,
|
| 315 |
+
"grad_norm": 2.054981402058176e-12,
|
| 316 |
+
"learning_rate": 1.547832598304758e-05,
|
| 317 |
+
"logits/chosen": NaN,
|
| 318 |
+
"logits/rejected": 0.5537346005439758,
|
| 319 |
+
"logps/chosen": -28.839187622070312,
|
| 320 |
+
"logps/rejected": -72.35997009277344,
|
| 321 |
+
"loss": 0.2644,
|
| 322 |
+
"rewards/accuracies": 0.6937500238418579,
|
| 323 |
+
"rewards/chosen": 70.51815795898438,
|
| 324 |
+
"rewards/margins": 75.06769561767578,
|
| 325 |
+
"rewards/rejected": -4.549544334411621,
|
| 326 |
+
"step": 100
|
| 327 |
+
},
|
| 328 |
+
{
|
| 329 |
+
"epoch": 0.9779951100244498,
|
| 330 |
+
"eval_logits/chosen": NaN,
|
| 331 |
+
"eval_logits/rejected": -0.4462790787220001,
|
| 332 |
+
"eval_logps/chosen": -201.78216552734375,
|
| 333 |
+
"eval_logps/rejected": -73.33938598632812,
|
| 334 |
+
"eval_loss": 0.3611069321632385,
|
| 335 |
+
"eval_rewards/accuracies": 0.7414772510528564,
|
| 336 |
+
"eval_rewards/chosen": 1.3431318998336792,
|
| 337 |
+
"eval_rewards/margins": 1.3155231475830078,
|
| 338 |
+
"eval_rewards/rejected": 0.027608675882220268,
|
| 339 |
+
"eval_runtime": 61.2942,
|
| 340 |
+
"eval_samples_per_second": 11.241,
|
| 341 |
+
"eval_steps_per_second": 0.718,
|
| 342 |
+
"step": 100
|
| 343 |
}
|
| 344 |
],
|
| 345 |
"logging_steps": 5,
|