fguryel commited on
Commit
4aa93ee
·
verified ·
1 Parent(s): 871d82e

Upload folder using huggingface_hub

Browse files
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1780b264366447965d82a1ff26d8bbc1d6b9db6fa345791f12a94b630ed3320b
3
  size 4991037968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50af6cfaef0b66eed5b89c78ecefc7b51d1d495a8ce00bfeeb711d0f5629fcb8
3
  size 4991037968
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9516d1ac25e339e5d92064c18054b82148dec67e44b80999621ec4892c5a09f9
3
  size 1610725592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2f79bba49da11c703d67f72efb455ac1f3915bbc2a61e271826791d316b3028
3
  size 1610725592
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa866c7589021a83baaeab8023d7f3e747b22b1c43b3a6585cff333fac7aca55
3
  size 13203690391
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ec5d2ff5fb3d54e506057d32c3990d680339983a5fdbdd468dace5050a5a0a1
3
  size 13203690391
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8bd75f3b287b15c92e6927ee3d2a9e3e9a7c2ea768e141eb8d1ab87cfbf9392a
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63aab5945fa6a4f19ef11eca5b6add2fab56216a2af2a42b2f4db10b37425e8e
3
  size 14645
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2994ac6b75a3e5e5a6b01d6da8955b44535bb0896dc728ab36a422e8d44ebdf
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cde082d406db1a89d52718af57a19af0e3b09ad5a557b076925c22776d5baf59
3
  size 1465
trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 4500,
3
  "best_metric": 1.2012678384780884,
4
  "best_model_checkpoint": "./orpheus-turkish-emotion-finetune/checkpoint-4500",
5
- "epoch": 22.393769470404983,
6
  "eval_steps": 500,
7
- "global_step": 4500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -710,6 +710,84 @@
710
  "eval_samples_per_second": 11.462,
711
  "eval_steps_per_second": 1.473,
712
  "step": 4500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
713
  }
714
  ],
715
  "logging_steps": 50,
@@ -729,7 +807,7 @@
729
  "attributes": {}
730
  }
731
  },
732
- "total_flos": 1.2451918079066112e+18,
733
  "train_batch_size": 1,
734
  "trial_name": null,
735
  "trial_params": null
 
2
  "best_global_step": 4500,
3
  "best_metric": 1.2012678384780884,
4
  "best_model_checkpoint": "./orpheus-turkish-emotion-finetune/checkpoint-4500",
5
+ "epoch": 24.882242990654206,
6
  "eval_steps": 500,
7
+ "global_step": 5000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
710
  "eval_samples_per_second": 11.462,
711
  "eval_steps_per_second": 1.473,
712
  "step": 4500
713
+ },
714
+ {
715
+ "epoch": 22.642990654205608,
716
+ "grad_norm": 1.0625,
717
+ "learning_rate": 2.7091379149682683e-07,
718
+ "loss": 1.2239,
719
+ "step": 4550
720
+ },
721
+ {
722
+ "epoch": 22.89221183800623,
723
+ "grad_norm": 1.3515625,
724
+ "learning_rate": 2.1738296461569164e-07,
725
+ "loss": 1.2121,
726
+ "step": 4600
727
+ },
728
+ {
729
+ "epoch": 23.139563862928348,
730
+ "grad_norm": 1.2421875,
731
+ "learning_rate": 1.6962246671706872e-07,
732
+ "loss": 1.1973,
733
+ "step": 4650
734
+ },
735
+ {
736
+ "epoch": 23.388785046728973,
737
+ "grad_norm": 1.2578125,
738
+ "learning_rate": 1.2768992185557104e-07,
739
+ "loss": 1.2183,
740
+ "step": 4700
741
+ },
742
+ {
743
+ "epoch": 23.638006230529594,
744
+ "grad_norm": 1.3125,
745
+ "learning_rate": 9.163592253675247e-08,
746
+ "loss": 1.2195,
747
+ "step": 4750
748
+ },
749
+ {
750
+ "epoch": 23.88722741433022,
751
+ "grad_norm": 1.2109375,
752
+ "learning_rate": 6.15039686761748e-08,
753
+ "loss": 1.21,
754
+ "step": 4800
755
+ },
756
+ {
757
+ "epoch": 24.134579439252338,
758
+ "grad_norm": 1.2265625,
759
+ "learning_rate": 3.733041511583768e-08,
760
+ "loss": 1.2056,
761
+ "step": 4850
762
+ },
763
+ {
764
+ "epoch": 24.38380062305296,
765
+ "grad_norm": 1.46875,
766
+ "learning_rate": 1.914442776128622e-08,
767
+ "loss": 1.1913,
768
+ "step": 4900
769
+ },
770
+ {
771
+ "epoch": 24.633021806853584,
772
+ "grad_norm": 1.1796875,
773
+ "learning_rate": 6.9679483923318356e-09,
774
+ "loss": 1.2346,
775
+ "step": 4950
776
+ },
777
+ {
778
+ "epoch": 24.882242990654206,
779
+ "grad_norm": 1.171875,
780
+ "learning_rate": 8.156681898252583e-10,
781
+ "loss": 1.2149,
782
+ "step": 5000
783
+ },
784
+ {
785
+ "epoch": 24.882242990654206,
786
+ "eval_loss": 1.201310396194458,
787
+ "eval_runtime": 15.6169,
788
+ "eval_samples_per_second": 11.462,
789
+ "eval_steps_per_second": 1.473,
790
+ "step": 5000
791
  }
792
  ],
793
  "logging_steps": 50,
 
807
  "attributes": {}
808
  }
809
  },
810
+ "total_flos": 1.3835310591104778e+18,
811
  "train_batch_size": 1,
812
  "trial_name": null,
813
  "trial_params": null