Training in progress, step 9200, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 100690288
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e9cbca2f45d6b52d4f7b6ebb7322fe228c587105c317fa5a3486bcddea3632ee
|
| 3 |
size 100690288
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 51345082
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:50254272993d00750d20519785aed6da515890c737ef88754fedb4a1125894be
|
| 3 |
size 51345082
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:02d52ff618a83e9102020a0283499806564b6579fb7c6addaa728c8a804eedd0
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:57b75f147ee39bab59d6712296fd45b943162e0434e26215e766d88ba6e6e775
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 2.2019271850585938,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-8400",
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 200,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -6675,6 +6675,154 @@
|
|
| 6675 |
"eval_samples_per_second": 16.176,
|
| 6676 |
"eval_steps_per_second": 8.096,
|
| 6677 |
"step": 9000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6678 |
}
|
| 6679 |
],
|
| 6680 |
"logging_steps": 10,
|
|
@@ -6689,7 +6837,7 @@
|
|
| 6689 |
"early_stopping_threshold": 0.0
|
| 6690 |
},
|
| 6691 |
"attributes": {
|
| 6692 |
-
"early_stopping_patience_counter":
|
| 6693 |
}
|
| 6694 |
},
|
| 6695 |
"TrainerControl": {
|
|
@@ -6698,12 +6846,12 @@
|
|
| 6698 |
"should_evaluate": false,
|
| 6699 |
"should_log": false,
|
| 6700 |
"should_save": true,
|
| 6701 |
-
"should_training_stop":
|
| 6702 |
},
|
| 6703 |
"attributes": {}
|
| 6704 |
}
|
| 6705 |
},
|
| 6706 |
-
"total_flos":
|
| 6707 |
"train_batch_size": 2,
|
| 6708 |
"trial_name": null,
|
| 6709 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 2.2019271850585938,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-8400",
|
| 4 |
+
"epoch": 0.14505206896279887,
|
| 5 |
"eval_steps": 200,
|
| 6 |
+
"global_step": 9200,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 6675 |
"eval_samples_per_second": 16.176,
|
| 6676 |
"eval_steps_per_second": 8.096,
|
| 6677 |
"step": 9000
|
| 6678 |
+
},
|
| 6679 |
+
{
|
| 6680 |
+
"epoch": 0.14205642840813237,
|
| 6681 |
+
"grad_norm": 5.197229862213135,
|
| 6682 |
+
"learning_rate": 0.0001988978579208574,
|
| 6683 |
+
"loss": 9.1033,
|
| 6684 |
+
"step": 9010
|
| 6685 |
+
},
|
| 6686 |
+
{
|
| 6687 |
+
"epoch": 0.14221409370048324,
|
| 6688 |
+
"grad_norm": 4.565831661224365,
|
| 6689 |
+
"learning_rate": 0.00019889541187699707,
|
| 6690 |
+
"loss": 8.8077,
|
| 6691 |
+
"step": 9020
|
| 6692 |
+
},
|
| 6693 |
+
{
|
| 6694 |
+
"epoch": 0.1423717589928341,
|
| 6695 |
+
"grad_norm": 4.8584513664245605,
|
| 6696 |
+
"learning_rate": 0.00019889296313689968,
|
| 6697 |
+
"loss": 9.0339,
|
| 6698 |
+
"step": 9030
|
| 6699 |
+
},
|
| 6700 |
+
{
|
| 6701 |
+
"epoch": 0.14252942428518497,
|
| 6702 |
+
"grad_norm": 4.712079048156738,
|
| 6703 |
+
"learning_rate": 0.00019889051170063194,
|
| 6704 |
+
"loss": 8.8577,
|
| 6705 |
+
"step": 9040
|
| 6706 |
+
},
|
| 6707 |
+
{
|
| 6708 |
+
"epoch": 0.14268708957753584,
|
| 6709 |
+
"grad_norm": 9.281351089477539,
|
| 6710 |
+
"learning_rate": 0.00019888805756826074,
|
| 6711 |
+
"loss": 8.3711,
|
| 6712 |
+
"step": 9050
|
| 6713 |
+
},
|
| 6714 |
+
{
|
| 6715 |
+
"epoch": 0.1428447548698867,
|
| 6716 |
+
"grad_norm": 4.656757354736328,
|
| 6717 |
+
"learning_rate": 0.00019888560073985295,
|
| 6718 |
+
"loss": 9.3672,
|
| 6719 |
+
"step": 9060
|
| 6720 |
+
},
|
| 6721 |
+
{
|
| 6722 |
+
"epoch": 0.14300242016223758,
|
| 6723 |
+
"grad_norm": 5.0217108726501465,
|
| 6724 |
+
"learning_rate": 0.00019888314121547558,
|
| 6725 |
+
"loss": 9.3907,
|
| 6726 |
+
"step": 9070
|
| 6727 |
+
},
|
| 6728 |
+
{
|
| 6729 |
+
"epoch": 0.14316008545458844,
|
| 6730 |
+
"grad_norm": 4.683655738830566,
|
| 6731 |
+
"learning_rate": 0.00019888067899519567,
|
| 6732 |
+
"loss": 8.6511,
|
| 6733 |
+
"step": 9080
|
| 6734 |
+
},
|
| 6735 |
+
{
|
| 6736 |
+
"epoch": 0.1433177507469393,
|
| 6737 |
+
"grad_norm": 4.991017818450928,
|
| 6738 |
+
"learning_rate": 0.00019887821407908033,
|
| 6739 |
+
"loss": 8.6748,
|
| 6740 |
+
"step": 9090
|
| 6741 |
+
},
|
| 6742 |
+
{
|
| 6743 |
+
"epoch": 0.1434754160392902,
|
| 6744 |
+
"grad_norm": 13.076788902282715,
|
| 6745 |
+
"learning_rate": 0.0001988757464671968,
|
| 6746 |
+
"loss": 8.3435,
|
| 6747 |
+
"step": 9100
|
| 6748 |
+
},
|
| 6749 |
+
{
|
| 6750 |
+
"epoch": 0.14363308133164107,
|
| 6751 |
+
"grad_norm": 4.446813583374023,
|
| 6752 |
+
"learning_rate": 0.00019887327615961234,
|
| 6753 |
+
"loss": 9.5288,
|
| 6754 |
+
"step": 9110
|
| 6755 |
+
},
|
| 6756 |
+
{
|
| 6757 |
+
"epoch": 0.14379074662399194,
|
| 6758 |
+
"grad_norm": 4.752020835876465,
|
| 6759 |
+
"learning_rate": 0.00019887080315639428,
|
| 6760 |
+
"loss": 9.3455,
|
| 6761 |
+
"step": 9120
|
| 6762 |
+
},
|
| 6763 |
+
{
|
| 6764 |
+
"epoch": 0.1439484119163428,
|
| 6765 |
+
"grad_norm": 5.036296367645264,
|
| 6766 |
+
"learning_rate": 0.0001988683274576101,
|
| 6767 |
+
"loss": 8.9484,
|
| 6768 |
+
"step": 9130
|
| 6769 |
+
},
|
| 6770 |
+
{
|
| 6771 |
+
"epoch": 0.14410607720869367,
|
| 6772 |
+
"grad_norm": 4.9117841720581055,
|
| 6773 |
+
"learning_rate": 0.0001988658490633272,
|
| 6774 |
+
"loss": 8.4929,
|
| 6775 |
+
"step": 9140
|
| 6776 |
+
},
|
| 6777 |
+
{
|
| 6778 |
+
"epoch": 0.14426374250104454,
|
| 6779 |
+
"grad_norm": 13.374736785888672,
|
| 6780 |
+
"learning_rate": 0.00019886336797361325,
|
| 6781 |
+
"loss": 8.4415,
|
| 6782 |
+
"step": 9150
|
| 6783 |
+
},
|
| 6784 |
+
{
|
| 6785 |
+
"epoch": 0.1444214077933954,
|
| 6786 |
+
"grad_norm": 4.678138256072998,
|
| 6787 |
+
"learning_rate": 0.00019886088418853581,
|
| 6788 |
+
"loss": 9.2699,
|
| 6789 |
+
"step": 9160
|
| 6790 |
+
},
|
| 6791 |
+
{
|
| 6792 |
+
"epoch": 0.14457907308574627,
|
| 6793 |
+
"grad_norm": 6.370858669281006,
|
| 6794 |
+
"learning_rate": 0.00019885839770816268,
|
| 6795 |
+
"loss": 8.5141,
|
| 6796 |
+
"step": 9170
|
| 6797 |
+
},
|
| 6798 |
+
{
|
| 6799 |
+
"epoch": 0.14473673837809714,
|
| 6800 |
+
"grad_norm": 5.328512191772461,
|
| 6801 |
+
"learning_rate": 0.0001988559085325616,
|
| 6802 |
+
"loss": 8.5896,
|
| 6803 |
+
"step": 9180
|
| 6804 |
+
},
|
| 6805 |
+
{
|
| 6806 |
+
"epoch": 0.144894403670448,
|
| 6807 |
+
"grad_norm": 5.4744391441345215,
|
| 6808 |
+
"learning_rate": 0.0001988534166618004,
|
| 6809 |
+
"loss": 8.6385,
|
| 6810 |
+
"step": 9190
|
| 6811 |
+
},
|
| 6812 |
+
{
|
| 6813 |
+
"epoch": 0.14505206896279887,
|
| 6814 |
+
"grad_norm": 99.89271545410156,
|
| 6815 |
+
"learning_rate": 0.00019885092209594708,
|
| 6816 |
+
"loss": 8.5823,
|
| 6817 |
+
"step": 9200
|
| 6818 |
+
},
|
| 6819 |
+
{
|
| 6820 |
+
"epoch": 0.14505206896279887,
|
| 6821 |
+
"eval_loss": 2.203756093978882,
|
| 6822 |
+
"eval_runtime": 62.7646,
|
| 6823 |
+
"eval_samples_per_second": 16.203,
|
| 6824 |
+
"eval_steps_per_second": 8.11,
|
| 6825 |
+
"step": 9200
|
| 6826 |
}
|
| 6827 |
],
|
| 6828 |
"logging_steps": 10,
|
|
|
|
| 6837 |
"early_stopping_threshold": 0.0
|
| 6838 |
},
|
| 6839 |
"attributes": {
|
| 6840 |
+
"early_stopping_patience_counter": 4
|
| 6841 |
}
|
| 6842 |
},
|
| 6843 |
"TrainerControl": {
|
|
|
|
| 6846 |
"should_evaluate": false,
|
| 6847 |
"should_log": false,
|
| 6848 |
"should_save": true,
|
| 6849 |
+
"should_training_stop": true
|
| 6850 |
},
|
| 6851 |
"attributes": {}
|
| 6852 |
}
|
| 6853 |
},
|
| 6854 |
+
"total_flos": 1.0089740855200973e+17,
|
| 6855 |
"train_batch_size": 2,
|
| 6856 |
"trial_name": null,
|
| 6857 |
"trial_params": null
|