Training in progress, epoch 4, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1227009528
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:43744e9d3a7df899c77712de3afb6af1a054747752266c81e6c564a6bbdfc9fc
|
| 3 |
size 1227009528
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2454133690
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9dc38bc58189826542c01bb812237dd78de2565f1b21ebc12593e6867e65ffec
|
| 3 |
size 2454133690
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bd03df90c7c1260e5c9a0b8fad9ec21a69a6cc6367e61c044d90f7a2513787fb
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e03a74488d48b3a98579050f742070bcb62d3183a7aab3987e0d0c9c802d894
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric":
|
| 3 |
-
"best_model_checkpoint": "/kaggle/working/output/checkpoint-
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -304,6 +304,105 @@
|
|
| 304 |
"eval_samples_per_second": 26.458,
|
| 305 |
"eval_steps_per_second": 3.325,
|
| 306 |
"step": 3915
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 307 |
}
|
| 308 |
],
|
| 309 |
"logging_steps": 100,
|
|
@@ -332,7 +431,7 @@
|
|
| 332 |
"attributes": {}
|
| 333 |
}
|
| 334 |
},
|
| 335 |
-
"total_flos":
|
| 336 |
"train_batch_size": 8,
|
| 337 |
"trial_name": null,
|
| 338 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 34.954986572265625,
|
| 3 |
+
"best_model_checkpoint": "/kaggle/working/output/checkpoint-5220",
|
| 4 |
+
"epoch": 4.0,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 5220,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 304 |
"eval_samples_per_second": 26.458,
|
| 305 |
"eval_steps_per_second": 3.325,
|
| 306 |
"step": 3915
|
| 307 |
+
},
|
| 308 |
+
{
|
| 309 |
+
"epoch": 3.0651340996168583,
|
| 310 |
+
"grad_norm": 2.8833682537078857,
|
| 311 |
+
"learning_rate": 4.808477011494253e-05,
|
| 312 |
+
"loss": 34.523,
|
| 313 |
+
"step": 4000
|
| 314 |
+
},
|
| 315 |
+
{
|
| 316 |
+
"epoch": 3.1417624521072796,
|
| 317 |
+
"grad_norm": 2.8744261264801025,
|
| 318 |
+
"learning_rate": 4.803735632183908e-05,
|
| 319 |
+
"loss": 33.921,
|
| 320 |
+
"step": 4100
|
| 321 |
+
},
|
| 322 |
+
{
|
| 323 |
+
"epoch": 3.218390804597701,
|
| 324 |
+
"grad_norm": 2.928616762161255,
|
| 325 |
+
"learning_rate": 4.798946360153257e-05,
|
| 326 |
+
"loss": 33.6903,
|
| 327 |
+
"step": 4200
|
| 328 |
+
},
|
| 329 |
+
{
|
| 330 |
+
"epoch": 3.2950191570881224,
|
| 331 |
+
"grad_norm": 3.0579280853271484,
|
| 332 |
+
"learning_rate": 4.7941570881226054e-05,
|
| 333 |
+
"loss": 33.0608,
|
| 334 |
+
"step": 4300
|
| 335 |
+
},
|
| 336 |
+
{
|
| 337 |
+
"epoch": 3.371647509578544,
|
| 338 |
+
"grad_norm": 1.6688510179519653,
|
| 339 |
+
"learning_rate": 4.789367816091954e-05,
|
| 340 |
+
"loss": 33.8769,
|
| 341 |
+
"step": 4400
|
| 342 |
+
},
|
| 343 |
+
{
|
| 344 |
+
"epoch": 3.4482758620689653,
|
| 345 |
+
"grad_norm": 2.6190459728240967,
|
| 346 |
+
"learning_rate": 4.784578544061303e-05,
|
| 347 |
+
"loss": 33.2974,
|
| 348 |
+
"step": 4500
|
| 349 |
+
},
|
| 350 |
+
{
|
| 351 |
+
"epoch": 3.524904214559387,
|
| 352 |
+
"grad_norm": 2.6260671615600586,
|
| 353 |
+
"learning_rate": 4.7797892720306515e-05,
|
| 354 |
+
"loss": 34.0589,
|
| 355 |
+
"step": 4600
|
| 356 |
+
},
|
| 357 |
+
{
|
| 358 |
+
"epoch": 3.6015325670498086,
|
| 359 |
+
"grad_norm": 3.191978693008423,
|
| 360 |
+
"learning_rate": 4.775e-05,
|
| 361 |
+
"loss": 33.9493,
|
| 362 |
+
"step": 4700
|
| 363 |
+
},
|
| 364 |
+
{
|
| 365 |
+
"epoch": 3.67816091954023,
|
| 366 |
+
"grad_norm": 2.759941339492798,
|
| 367 |
+
"learning_rate": 4.770210727969349e-05,
|
| 368 |
+
"loss": 33.5936,
|
| 369 |
+
"step": 4800
|
| 370 |
+
},
|
| 371 |
+
{
|
| 372 |
+
"epoch": 3.7547892720306515,
|
| 373 |
+
"grad_norm": 2.262294054031372,
|
| 374 |
+
"learning_rate": 4.7654214559386976e-05,
|
| 375 |
+
"loss": 34.06,
|
| 376 |
+
"step": 4900
|
| 377 |
+
},
|
| 378 |
+
{
|
| 379 |
+
"epoch": 3.8314176245210727,
|
| 380 |
+
"grad_norm": 4.6808600425720215,
|
| 381 |
+
"learning_rate": 4.760632183908046e-05,
|
| 382 |
+
"loss": 34.1592,
|
| 383 |
+
"step": 5000
|
| 384 |
+
},
|
| 385 |
+
{
|
| 386 |
+
"epoch": 3.9080459770114944,
|
| 387 |
+
"grad_norm": 4.294464111328125,
|
| 388 |
+
"learning_rate": 4.755842911877395e-05,
|
| 389 |
+
"loss": 34.4652,
|
| 390 |
+
"step": 5100
|
| 391 |
+
},
|
| 392 |
+
{
|
| 393 |
+
"epoch": 3.9846743295019156,
|
| 394 |
+
"grad_norm": 2.7845072746276855,
|
| 395 |
+
"learning_rate": 4.7510536398467436e-05,
|
| 396 |
+
"loss": 34.2075,
|
| 397 |
+
"step": 5200
|
| 398 |
+
},
|
| 399 |
+
{
|
| 400 |
+
"epoch": 4.0,
|
| 401 |
+
"eval_loss": 34.954986572265625,
|
| 402 |
+
"eval_runtime": 49.2865,
|
| 403 |
+
"eval_samples_per_second": 26.478,
|
| 404 |
+
"eval_steps_per_second": 3.327,
|
| 405 |
+
"step": 5220
|
| 406 |
}
|
| 407 |
],
|
| 408 |
"logging_steps": 100,
|
|
|
|
| 431 |
"attributes": {}
|
| 432 |
}
|
| 433 |
},
|
| 434 |
+
"total_flos": 5629243612188672.0,
|
| 435 |
"train_batch_size": 8,
|
| 436 |
"trial_name": null,
|
| 437 |
"trial_params": null
|