Training in progress, epoch 6, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1227009528
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:20c40f891a6ad2cd6bdb721e2f111292589ff390313316ee8f0d082edb0b9b03
|
| 3 |
size 1227009528
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2454133690
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:97e7d057f22cb8197d547d1ed0d192390fd1e34fb36aa35aa6b76f03d0e2f9d9
|
| 3 |
size 2454133690
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fe7c95ba6d299e128ae454cc0731e509722836b2913c0cc0546da0aa648a6383
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a099afb4d9a9c6bf9e5e93d59bc1aa866f860cc49e0492bfafa53bc834b220ce
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric": 34.
|
| 3 |
-
"best_model_checkpoint": "/kaggle/working/output/checkpoint-
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -502,6 +502,105 @@
|
|
| 502 |
"eval_samples_per_second": 26.47,
|
| 503 |
"eval_steps_per_second": 3.327,
|
| 504 |
"step": 6525
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 505 |
}
|
| 506 |
],
|
| 507 |
"logging_steps": 100,
|
|
@@ -530,7 +629,7 @@
|
|
| 530 |
"attributes": {}
|
| 531 |
}
|
| 532 |
},
|
| 533 |
-
"total_flos":
|
| 534 |
"train_batch_size": 8,
|
| 535 |
"trial_name": null,
|
| 536 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 34.841033935546875,
|
| 3 |
+
"best_model_checkpoint": "/kaggle/working/output/checkpoint-7830",
|
| 4 |
+
"epoch": 6.0,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 7830,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 502 |
"eval_samples_per_second": 26.47,
|
| 503 |
"eval_steps_per_second": 3.327,
|
| 504 |
"step": 6525
|
| 505 |
+
},
|
| 506 |
+
{
|
| 507 |
+
"epoch": 5.057471264367816,
|
| 508 |
+
"grad_norm": 2.3083884716033936,
|
| 509 |
+
"learning_rate": 4.684051724137931e-05,
|
| 510 |
+
"loss": 33.8987,
|
| 511 |
+
"step": 6600
|
| 512 |
+
},
|
| 513 |
+
{
|
| 514 |
+
"epoch": 5.134099616858237,
|
| 515 |
+
"grad_norm": 2.228327751159668,
|
| 516 |
+
"learning_rate": 4.67926245210728e-05,
|
| 517 |
+
"loss": 33.8189,
|
| 518 |
+
"step": 6700
|
| 519 |
+
},
|
| 520 |
+
{
|
| 521 |
+
"epoch": 5.210727969348659,
|
| 522 |
+
"grad_norm": 3.6814918518066406,
|
| 523 |
+
"learning_rate": 4.6744731800766284e-05,
|
| 524 |
+
"loss": 33.8364,
|
| 525 |
+
"step": 6800
|
| 526 |
+
},
|
| 527 |
+
{
|
| 528 |
+
"epoch": 5.287356321839081,
|
| 529 |
+
"grad_norm": 2.5758285522460938,
|
| 530 |
+
"learning_rate": 4.669683908045977e-05,
|
| 531 |
+
"loss": 33.7093,
|
| 532 |
+
"step": 6900
|
| 533 |
+
},
|
| 534 |
+
{
|
| 535 |
+
"epoch": 5.363984674329502,
|
| 536 |
+
"grad_norm": 4.175839900970459,
|
| 537 |
+
"learning_rate": 4.6648946360153265e-05,
|
| 538 |
+
"loss": 33.6689,
|
| 539 |
+
"step": 7000
|
| 540 |
+
},
|
| 541 |
+
{
|
| 542 |
+
"epoch": 5.440613026819923,
|
| 543 |
+
"grad_norm": 2.213092088699341,
|
| 544 |
+
"learning_rate": 4.6601053639846745e-05,
|
| 545 |
+
"loss": 33.7936,
|
| 546 |
+
"step": 7100
|
| 547 |
+
},
|
| 548 |
+
{
|
| 549 |
+
"epoch": 5.517241379310345,
|
| 550 |
+
"grad_norm": 2.4982571601867676,
|
| 551 |
+
"learning_rate": 4.655316091954023e-05,
|
| 552 |
+
"loss": 33.3686,
|
| 553 |
+
"step": 7200
|
| 554 |
+
},
|
| 555 |
+
{
|
| 556 |
+
"epoch": 5.593869731800766,
|
| 557 |
+
"grad_norm": 3.635983943939209,
|
| 558 |
+
"learning_rate": 4.6505747126436784e-05,
|
| 559 |
+
"loss": 33.5493,
|
| 560 |
+
"step": 7300
|
| 561 |
+
},
|
| 562 |
+
{
|
| 563 |
+
"epoch": 5.670498084291188,
|
| 564 |
+
"grad_norm": 4.315894603729248,
|
| 565 |
+
"learning_rate": 4.645785440613027e-05,
|
| 566 |
+
"loss": 33.6607,
|
| 567 |
+
"step": 7400
|
| 568 |
+
},
|
| 569 |
+
{
|
| 570 |
+
"epoch": 5.747126436781609,
|
| 571 |
+
"grad_norm": 2.6151223182678223,
|
| 572 |
+
"learning_rate": 4.640996168582376e-05,
|
| 573 |
+
"loss": 34.7535,
|
| 574 |
+
"step": 7500
|
| 575 |
+
},
|
| 576 |
+
{
|
| 577 |
+
"epoch": 5.823754789272031,
|
| 578 |
+
"grad_norm": 4.03953218460083,
|
| 579 |
+
"learning_rate": 4.6362068965517244e-05,
|
| 580 |
+
"loss": 33.9865,
|
| 581 |
+
"step": 7600
|
| 582 |
+
},
|
| 583 |
+
{
|
| 584 |
+
"epoch": 5.900383141762452,
|
| 585 |
+
"grad_norm": 2.512362480163574,
|
| 586 |
+
"learning_rate": 4.6314176245210724e-05,
|
| 587 |
+
"loss": 33.0343,
|
| 588 |
+
"step": 7700
|
| 589 |
+
},
|
| 590 |
+
{
|
| 591 |
+
"epoch": 5.977011494252873,
|
| 592 |
+
"grad_norm": 4.745575428009033,
|
| 593 |
+
"learning_rate": 4.626628352490422e-05,
|
| 594 |
+
"loss": 33.4544,
|
| 595 |
+
"step": 7800
|
| 596 |
+
},
|
| 597 |
+
{
|
| 598 |
+
"epoch": 6.0,
|
| 599 |
+
"eval_loss": 34.841033935546875,
|
| 600 |
+
"eval_runtime": 49.3059,
|
| 601 |
+
"eval_samples_per_second": 26.467,
|
| 602 |
+
"eval_steps_per_second": 3.326,
|
| 603 |
+
"step": 7830
|
| 604 |
}
|
| 605 |
],
|
| 606 |
"logging_steps": 100,
|
|
|
|
| 629 |
"attributes": {}
|
| 630 |
}
|
| 631 |
},
|
| 632 |
+
"total_flos": 8443865418283008.0,
|
| 633 |
"train_batch_size": 8,
|
| 634 |
"trial_name": null,
|
| 635 |
"trial_params": null
|