Training in progress, epoch 2, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +240 -6
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 364930784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5910e0a282e4df32eef0552c8b3139c44b0817839e4759d05b0434f2de570164
|
| 3 |
size 364930784
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 185530443
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:985a31fdc1b7c2050bb3569366a09e87d28883fa8a2c2d07e33459da0df3290e
|
| 3 |
size 185530443
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cede62ea1101cd7c1a2b08854972b212cc8c8285489e3cdb8d86af1f7b9e8d9b
|
| 3 |
size 15429
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a96cad50eef2317d5b06d359661294bcf4e10472a2fe4aa3e2c96c25afaf8fe4
|
| 3 |
size 15429
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:653c374a59cc448606819acd79f0d50657fef2b2d01bddc9aff0cb92325491aa
|
| 3 |
size 15429
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b0ed1fb8354c06bf0eab3c9b2dbb6b716f1ca765c82ae4407881d1f78bd018a
|
| 3 |
size 15429
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4dbed6f9227c6453885e3fcec169430d1d02615fe4f493d1e5b46420af58b713
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
-
"best_metric": 0.
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 2.
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -7496,6 +7496,240 @@
|
|
| 7496 |
"eval_samples_per_second": 425.989,
|
| 7497 |
"eval_steps_per_second": 13.324,
|
| 7498 |
"step": 4800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7499 |
}
|
| 7500 |
],
|
| 7501 |
"logging_steps": 5,
|
|
@@ -7510,7 +7744,7 @@
|
|
| 7510 |
"early_stopping_threshold": 0.0001
|
| 7511 |
},
|
| 7512 |
"attributes": {
|
| 7513 |
-
"early_stopping_patience_counter":
|
| 7514 |
}
|
| 7515 |
},
|
| 7516 |
"TrainerControl": {
|
|
@@ -7519,12 +7753,12 @@
|
|
| 7519 |
"should_evaluate": false,
|
| 7520 |
"should_log": false,
|
| 7521 |
"should_save": true,
|
| 7522 |
-
"should_training_stop":
|
| 7523 |
},
|
| 7524 |
"attributes": {}
|
| 7525 |
}
|
| 7526 |
},
|
| 7527 |
-
"total_flos": 3.
|
| 7528 |
"train_batch_size": 8,
|
| 7529 |
"trial_name": null,
|
| 7530 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
+
"best_metric": 0.5373095273971558,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 2.9276734210915545,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 4950,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 7496 |
"eval_samples_per_second": 425.989,
|
| 7497 |
"eval_steps_per_second": 13.324,
|
| 7498 |
"step": 4800
|
| 7499 |
+
},
|
| 7500 |
+
{
|
| 7501 |
+
"epoch": 2.841887294778879,
|
| 7502 |
+
"grad_norm": 0.22534750401973724,
|
| 7503 |
+
"learning_rate": 1.6351597887982846e-06,
|
| 7504 |
+
"loss": 0.581,
|
| 7505 |
+
"step": 4805
|
| 7506 |
+
},
|
| 7507 |
+
{
|
| 7508 |
+
"epoch": 2.8448454370655227,
|
| 7509 |
+
"grad_norm": 0.23928098380565643,
|
| 7510 |
+
"learning_rate": 1.5529003378542404e-06,
|
| 7511 |
+
"loss": 0.5837,
|
| 7512 |
+
"step": 4810
|
| 7513 |
+
},
|
| 7514 |
+
{
|
| 7515 |
+
"epoch": 2.847803579352167,
|
| 7516 |
+
"grad_norm": 0.21647833287715912,
|
| 7517 |
+
"learning_rate": 1.4727559570263333e-06,
|
| 7518 |
+
"loss": 0.5701,
|
| 7519 |
+
"step": 4815
|
| 7520 |
+
},
|
| 7521 |
+
{
|
| 7522 |
+
"epoch": 2.850761721638811,
|
| 7523 |
+
"grad_norm": 0.2176506221294403,
|
| 7524 |
+
"learning_rate": 1.3947274699220398e-06,
|
| 7525 |
+
"loss": 0.5626,
|
| 7526 |
+
"step": 4820
|
| 7527 |
+
},
|
| 7528 |
+
{
|
| 7529 |
+
"epoch": 2.8537198639254546,
|
| 7530 |
+
"grad_norm": 0.21065934002399445,
|
| 7531 |
+
"learning_rate": 1.3188156784048088e-06,
|
| 7532 |
+
"loss": 0.5686,
|
| 7533 |
+
"step": 4825
|
| 7534 |
+
},
|
| 7535 |
+
{
|
| 7536 |
+
"epoch": 2.856678006212099,
|
| 7537 |
+
"grad_norm": 0.22182585299015045,
|
| 7538 |
+
"learning_rate": 1.2450213625857274e-06,
|
| 7539 |
+
"loss": 0.5761,
|
| 7540 |
+
"step": 4830
|
| 7541 |
+
},
|
| 7542 |
+
{
|
| 7543 |
+
"epoch": 2.8596361484987427,
|
| 7544 |
+
"grad_norm": 0.21298271417617798,
|
| 7545 |
+
"learning_rate": 1.1733452808156017e-06,
|
| 7546 |
+
"loss": 0.5867,
|
| 7547 |
+
"step": 4835
|
| 7548 |
+
},
|
| 7549 |
+
{
|
| 7550 |
+
"epoch": 2.8625942907853865,
|
| 7551 |
+
"grad_norm": 0.229048490524292,
|
| 7552 |
+
"learning_rate": 1.103788169677036e-06,
|
| 7553 |
+
"loss": 0.589,
|
| 7554 |
+
"step": 4840
|
| 7555 |
+
},
|
| 7556 |
+
{
|
| 7557 |
+
"epoch": 2.865552433072031,
|
| 7558 |
+
"grad_norm": 0.2213655412197113,
|
| 7559 |
+
"learning_rate": 1.0363507439769986e-06,
|
| 7560 |
+
"loss": 0.5597,
|
| 7561 |
+
"step": 4845
|
| 7562 |
+
},
|
| 7563 |
+
{
|
| 7564 |
+
"epoch": 2.8685105753586746,
|
| 7565 |
+
"grad_norm": 0.21822868287563324,
|
| 7566 |
+
"learning_rate": 9.7103369673936e-07,
|
| 7567 |
+
"loss": 0.5712,
|
| 7568 |
+
"step": 4850
|
| 7569 |
+
},
|
| 7570 |
+
{
|
| 7571 |
+
"epoch": 2.8685105753586746,
|
| 7572 |
+
"eval_loss": 0.5373578667640686,
|
| 7573 |
+
"eval_runtime": 15.1783,
|
| 7574 |
+
"eval_samples_per_second": 427.584,
|
| 7575 |
+
"eval_steps_per_second": 13.374,
|
| 7576 |
+
"step": 4850
|
| 7577 |
+
},
|
| 7578 |
+
{
|
| 7579 |
+
"epoch": 2.871468717645319,
|
| 7580 |
+
"grad_norm": 0.22016650438308716,
|
| 7581 |
+
"learning_rate": 9.078376991978266e-07,
|
| 7582 |
+
"loss": 0.5587,
|
| 7583 |
+
"step": 4855
|
| 7584 |
+
},
|
| 7585 |
+
{
|
| 7586 |
+
"epoch": 2.8744268599319627,
|
| 7587 |
+
"grad_norm": 0.23947712779045105,
|
| 7588 |
+
"learning_rate": 8.467634007890796e-07,
|
| 7589 |
+
"loss": 0.5841,
|
| 7590 |
+
"step": 4860
|
| 7591 |
+
},
|
| 7592 |
+
{
|
| 7593 |
+
"epoch": 2.877385002218607,
|
| 7594 |
+
"grad_norm": 0.2243824005126953,
|
| 7595 |
+
"learning_rate": 7.878114291460063e-07,
|
| 7596 |
+
"loss": 0.5736,
|
| 7597 |
+
"step": 4865
|
| 7598 |
+
},
|
| 7599 |
+
{
|
| 7600 |
+
"epoch": 2.8803431445052508,
|
| 7601 |
+
"grad_norm": 0.22133906185626984,
|
| 7602 |
+
"learning_rate": 7.309823900913461e-07,
|
| 7603 |
+
"loss": 0.5764,
|
| 7604 |
+
"step": 4870
|
| 7605 |
+
},
|
| 7606 |
+
{
|
| 7607 |
+
"epoch": 2.8833012867918946,
|
| 7608 |
+
"grad_norm": 0.21976634860038757,
|
| 7609 |
+
"learning_rate": 6.76276867631405e-07,
|
| 7610 |
+
"loss": 0.5699,
|
| 7611 |
+
"step": 4875
|
| 7612 |
+
},
|
| 7613 |
+
{
|
| 7614 |
+
"epoch": 2.886259429078539,
|
| 7615 |
+
"grad_norm": 0.22008314728736877,
|
| 7616 |
+
"learning_rate": 6.236954239500471e-07,
|
| 7617 |
+
"loss": 0.5527,
|
| 7618 |
+
"step": 4880
|
| 7619 |
+
},
|
| 7620 |
+
{
|
| 7621 |
+
"epoch": 2.8892175713651826,
|
| 7622 |
+
"grad_norm": 0.22807146608829498,
|
| 7623 |
+
"learning_rate": 5.732385994029618e-07,
|
| 7624 |
+
"loss": 0.5943,
|
| 7625 |
+
"step": 4885
|
| 7626 |
+
},
|
| 7627 |
+
{
|
| 7628 |
+
"epoch": 2.8921757136518265,
|
| 7629 |
+
"grad_norm": 0.22938776016235352,
|
| 7630 |
+
"learning_rate": 5.249069125121154e-07,
|
| 7631 |
+
"loss": 0.5825,
|
| 7632 |
+
"step": 4890
|
| 7633 |
+
},
|
| 7634 |
+
{
|
| 7635 |
+
"epoch": 2.8951338559384707,
|
| 7636 |
+
"grad_norm": 0.20941923558712006,
|
| 7637 |
+
"learning_rate": 4.787008599603642e-07,
|
| 7638 |
+
"loss": 0.5685,
|
| 7639 |
+
"step": 4895
|
| 7640 |
+
},
|
| 7641 |
+
{
|
| 7642 |
+
"epoch": 2.8980919982251145,
|
| 7643 |
+
"grad_norm": 0.22085338830947876,
|
| 7644 |
+
"learning_rate": 4.346209165863655e-07,
|
| 7645 |
+
"loss": 0.5588,
|
| 7646 |
+
"step": 4900
|
| 7647 |
+
},
|
| 7648 |
+
{
|
| 7649 |
+
"epoch": 2.8980919982251145,
|
| 7650 |
+
"eval_loss": 0.5373329520225525,
|
| 7651 |
+
"eval_runtime": 15.2559,
|
| 7652 |
+
"eval_samples_per_second": 425.409,
|
| 7653 |
+
"eval_steps_per_second": 13.306,
|
| 7654 |
+
"step": 4900
|
| 7655 |
+
},
|
| 7656 |
+
{
|
| 7657 |
+
"epoch": 2.9010501405117584,
|
| 7658 |
+
"grad_norm": 0.22424866259098053,
|
| 7659 |
+
"learning_rate": 3.926675353797443e-07,
|
| 7660 |
+
"loss": 0.5725,
|
| 7661 |
+
"step": 4905
|
| 7662 |
+
},
|
| 7663 |
+
{
|
| 7664 |
+
"epoch": 2.9040082827984026,
|
| 7665 |
+
"grad_norm": 0.2182874232530594,
|
| 7666 |
+
"learning_rate": 3.5284114747641856e-07,
|
| 7667 |
+
"loss": 0.5582,
|
| 7668 |
+
"step": 4910
|
| 7669 |
+
},
|
| 7670 |
+
{
|
| 7671 |
+
"epoch": 2.9069664250850464,
|
| 7672 |
+
"grad_norm": 0.21973784267902374,
|
| 7673 |
+
"learning_rate": 3.151421621541335e-07,
|
| 7674 |
+
"loss": 0.5684,
|
| 7675 |
+
"step": 4915
|
| 7676 |
+
},
|
| 7677 |
+
{
|
| 7678 |
+
"epoch": 2.9099245673716907,
|
| 7679 |
+
"grad_norm": 0.2083846479654312,
|
| 7680 |
+
"learning_rate": 2.795709668283172e-07,
|
| 7681 |
+
"loss": 0.578,
|
| 7682 |
+
"step": 4920
|
| 7683 |
+
},
|
| 7684 |
+
{
|
| 7685 |
+
"epoch": 2.9128827096583345,
|
| 7686 |
+
"grad_norm": 0.2196836769580841,
|
| 7687 |
+
"learning_rate": 2.4612792704798287e-07,
|
| 7688 |
+
"loss": 0.5603,
|
| 7689 |
+
"step": 4925
|
| 7690 |
+
},
|
| 7691 |
+
{
|
| 7692 |
+
"epoch": 2.9158408519449788,
|
| 7693 |
+
"grad_norm": 0.22254040837287903,
|
| 7694 |
+
"learning_rate": 2.1481338649216013e-07,
|
| 7695 |
+
"loss": 0.5526,
|
| 7696 |
+
"step": 4930
|
| 7697 |
+
},
|
| 7698 |
+
{
|
| 7699 |
+
"epoch": 2.9187989942316226,
|
| 7700 |
+
"grad_norm": 0.2200893610715866,
|
| 7701 |
+
"learning_rate": 1.8562766696618855e-07,
|
| 7702 |
+
"loss": 0.5661,
|
| 7703 |
+
"step": 4935
|
| 7704 |
+
},
|
| 7705 |
+
{
|
| 7706 |
+
"epoch": 2.9217571365182664,
|
| 7707 |
+
"grad_norm": 0.22102928161621094,
|
| 7708 |
+
"learning_rate": 1.5857106839847136e-07,
|
| 7709 |
+
"loss": 0.5905,
|
| 7710 |
+
"step": 4940
|
| 7711 |
+
},
|
| 7712 |
+
{
|
| 7713 |
+
"epoch": 2.9247152788049107,
|
| 7714 |
+
"grad_norm": 0.2244081199169159,
|
| 7715 |
+
"learning_rate": 1.3364386883745962e-07,
|
| 7716 |
+
"loss": 0.5743,
|
| 7717 |
+
"step": 4945
|
| 7718 |
+
},
|
| 7719 |
+
{
|
| 7720 |
+
"epoch": 2.9276734210915545,
|
| 7721 |
+
"grad_norm": 0.23028399050235748,
|
| 7722 |
+
"learning_rate": 1.1084632444868224e-07,
|
| 7723 |
+
"loss": 0.5852,
|
| 7724 |
+
"step": 4950
|
| 7725 |
+
},
|
| 7726 |
+
{
|
| 7727 |
+
"epoch": 2.9276734210915545,
|
| 7728 |
+
"eval_loss": 0.5373095273971558,
|
| 7729 |
+
"eval_runtime": 15.2077,
|
| 7730 |
+
"eval_samples_per_second": 426.758,
|
| 7731 |
+
"eval_steps_per_second": 13.349,
|
| 7732 |
+
"step": 4950
|
| 7733 |
}
|
| 7734 |
],
|
| 7735 |
"logging_steps": 5,
|
|
|
|
| 7744 |
"early_stopping_threshold": 0.0001
|
| 7745 |
},
|
| 7746 |
"attributes": {
|
| 7747 |
+
"early_stopping_patience_counter": 3
|
| 7748 |
}
|
| 7749 |
},
|
| 7750 |
"TrainerControl": {
|
|
|
|
| 7753 |
"should_evaluate": false,
|
| 7754 |
"should_log": false,
|
| 7755 |
"should_save": true,
|
| 7756 |
+
"should_training_stop": true
|
| 7757 |
},
|
| 7758 |
"attributes": {}
|
| 7759 |
}
|
| 7760 |
},
|
| 7761 |
+
"total_flos": 3.873445665417724e+18,
|
| 7762 |
"train_batch_size": 8,
|
| 7763 |
"trial_name": null,
|
| 7764 |
"trial_params": null
|