Upload folder using huggingface_hub
Browse files- model-00001-of-00002.safetensors +1 -1
- model-00002-of-00002.safetensors +1 -1
- optimizer.pt +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +81 -3
model-00001-of-00002.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4991037968
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:50af6cfaef0b66eed5b89c78ecefc7b51d1d495a8ce00bfeeb711d0f5629fcb8
|
| 3 |
size 4991037968
|
model-00002-of-00002.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1610725592
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f2f79bba49da11c703d67f72efb455ac1f3915bbc2a61e271826791d316b3028
|
| 3 |
size 1610725592
|
optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13203690391
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1ec5d2ff5fb3d54e506057d32c3990d680339983a5fdbdd468dace5050a5a0a1
|
| 3 |
size 13203690391
|
rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:63aab5945fa6a4f19ef11eca5b6add2fab56216a2af2a42b2f4db10b37425e8e
|
| 3 |
size 14645
|
scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cde082d406db1a89d52718af57a19af0e3b09ad5a557b076925c22776d5baf59
|
| 3 |
size 1465
|
trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 4500,
|
| 3 |
"best_metric": 1.2012678384780884,
|
| 4 |
"best_model_checkpoint": "./orpheus-turkish-emotion-finetune/checkpoint-4500",
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -710,6 +710,84 @@
|
|
| 710 |
"eval_samples_per_second": 11.462,
|
| 711 |
"eval_steps_per_second": 1.473,
|
| 712 |
"step": 4500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 713 |
}
|
| 714 |
],
|
| 715 |
"logging_steps": 50,
|
|
@@ -729,7 +807,7 @@
|
|
| 729 |
"attributes": {}
|
| 730 |
}
|
| 731 |
},
|
| 732 |
-
"total_flos": 1.
|
| 733 |
"train_batch_size": 1,
|
| 734 |
"trial_name": null,
|
| 735 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": 4500,
|
| 3 |
"best_metric": 1.2012678384780884,
|
| 4 |
"best_model_checkpoint": "./orpheus-turkish-emotion-finetune/checkpoint-4500",
|
| 5 |
+
"epoch": 24.882242990654206,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 5000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 710 |
"eval_samples_per_second": 11.462,
|
| 711 |
"eval_steps_per_second": 1.473,
|
| 712 |
"step": 4500
|
| 713 |
+
},
|
| 714 |
+
{
|
| 715 |
+
"epoch": 22.642990654205608,
|
| 716 |
+
"grad_norm": 1.0625,
|
| 717 |
+
"learning_rate": 2.7091379149682683e-07,
|
| 718 |
+
"loss": 1.2239,
|
| 719 |
+
"step": 4550
|
| 720 |
+
},
|
| 721 |
+
{
|
| 722 |
+
"epoch": 22.89221183800623,
|
| 723 |
+
"grad_norm": 1.3515625,
|
| 724 |
+
"learning_rate": 2.1738296461569164e-07,
|
| 725 |
+
"loss": 1.2121,
|
| 726 |
+
"step": 4600
|
| 727 |
+
},
|
| 728 |
+
{
|
| 729 |
+
"epoch": 23.139563862928348,
|
| 730 |
+
"grad_norm": 1.2421875,
|
| 731 |
+
"learning_rate": 1.6962246671706872e-07,
|
| 732 |
+
"loss": 1.1973,
|
| 733 |
+
"step": 4650
|
| 734 |
+
},
|
| 735 |
+
{
|
| 736 |
+
"epoch": 23.388785046728973,
|
| 737 |
+
"grad_norm": 1.2578125,
|
| 738 |
+
"learning_rate": 1.2768992185557104e-07,
|
| 739 |
+
"loss": 1.2183,
|
| 740 |
+
"step": 4700
|
| 741 |
+
},
|
| 742 |
+
{
|
| 743 |
+
"epoch": 23.638006230529594,
|
| 744 |
+
"grad_norm": 1.3125,
|
| 745 |
+
"learning_rate": 9.163592253675247e-08,
|
| 746 |
+
"loss": 1.2195,
|
| 747 |
+
"step": 4750
|
| 748 |
+
},
|
| 749 |
+
{
|
| 750 |
+
"epoch": 23.88722741433022,
|
| 751 |
+
"grad_norm": 1.2109375,
|
| 752 |
+
"learning_rate": 6.15039686761748e-08,
|
| 753 |
+
"loss": 1.21,
|
| 754 |
+
"step": 4800
|
| 755 |
+
},
|
| 756 |
+
{
|
| 757 |
+
"epoch": 24.134579439252338,
|
| 758 |
+
"grad_norm": 1.2265625,
|
| 759 |
+
"learning_rate": 3.733041511583768e-08,
|
| 760 |
+
"loss": 1.2056,
|
| 761 |
+
"step": 4850
|
| 762 |
+
},
|
| 763 |
+
{
|
| 764 |
+
"epoch": 24.38380062305296,
|
| 765 |
+
"grad_norm": 1.46875,
|
| 766 |
+
"learning_rate": 1.914442776128622e-08,
|
| 767 |
+
"loss": 1.1913,
|
| 768 |
+
"step": 4900
|
| 769 |
+
},
|
| 770 |
+
{
|
| 771 |
+
"epoch": 24.633021806853584,
|
| 772 |
+
"grad_norm": 1.1796875,
|
| 773 |
+
"learning_rate": 6.9679483923318356e-09,
|
| 774 |
+
"loss": 1.2346,
|
| 775 |
+
"step": 4950
|
| 776 |
+
},
|
| 777 |
+
{
|
| 778 |
+
"epoch": 24.882242990654206,
|
| 779 |
+
"grad_norm": 1.171875,
|
| 780 |
+
"learning_rate": 8.156681898252583e-10,
|
| 781 |
+
"loss": 1.2149,
|
| 782 |
+
"step": 5000
|
| 783 |
+
},
|
| 784 |
+
{
|
| 785 |
+
"epoch": 24.882242990654206,
|
| 786 |
+
"eval_loss": 1.201310396194458,
|
| 787 |
+
"eval_runtime": 15.6169,
|
| 788 |
+
"eval_samples_per_second": 11.462,
|
| 789 |
+
"eval_steps_per_second": 1.473,
|
| 790 |
+
"step": 5000
|
| 791 |
}
|
| 792 |
],
|
| 793 |
"logging_steps": 50,
|
|
|
|
| 807 |
"attributes": {}
|
| 808 |
}
|
| 809 |
},
|
| 810 |
+
"total_flos": 1.3835310591104778e+18,
|
| 811 |
"train_batch_size": 1,
|
| 812 |
"trial_name": null,
|
| 813 |
"trial_params": null
|