fguryel commited on
Commit
435751c
·
verified ·
1 Parent(s): 1be0b76

Upload folder using huggingface_hub

Browse files
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48ee1c52efa36b1ddbf25bc4e4ed55d0578291857b8b679332606fbc3fb85339
3
  size 4991037968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1780b264366447965d82a1ff26d8bbc1d6b9db6fa345791f12a94b630ed3320b
3
  size 4991037968
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b18e90d4d7e18d06da89e473f5961ddfb78ace4047b02033b3bcfdd13e431ab
3
  size 1610725592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9516d1ac25e339e5d92064c18054b82148dec67e44b80999621ec4892c5a09f9
3
  size 1610725592
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e96ea5fa62813d9d6515dbcec2df5fe94ddafe752d31554765323bbd9612682
3
  size 13203690391
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa866c7589021a83baaeab8023d7f3e747b22b1c43b3a6585cff333fac7aca55
3
  size 13203690391
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a804dd9b4962bc1e7c8e5b51c83ce95f04ab0a366340b47fc4849e7d4ecffd6d
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bd75f3b287b15c92e6927ee3d2a9e3e9a7c2ea768e141eb8d1ab87cfbf9392a
3
  size 14645
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d4e59b60cb34c3dcef59040cbee17beefe5c2b969d4d9004a66e12c73ae4898
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2994ac6b75a3e5e5a6b01d6da8955b44535bb0896dc728ab36a422e8d44ebdf
3
  size 1465
trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 3500,
3
- "best_metric": 1.2014065980911255,
4
- "best_model_checkpoint": "./orpheus-turkish-emotion-finetune/checkpoint-3500",
5
- "epoch": 17.418691588785048,
6
  "eval_steps": 500,
7
- "global_step": 3500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -554,6 +554,162 @@
554
  "eval_samples_per_second": 11.474,
555
  "eval_steps_per_second": 1.474,
556
  "step": 3500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
557
  }
558
  ],
559
  "logging_steps": 50,
@@ -573,7 +729,7 @@
573
  "attributes": {}
574
  }
575
  },
576
- "total_flos": 9.686172158027366e+17,
577
  "train_batch_size": 1,
578
  "trial_name": null,
579
  "trial_params": null
 
1
  {
2
+ "best_global_step": 4500,
3
+ "best_metric": 1.2012678384780884,
4
+ "best_model_checkpoint": "./orpheus-turkish-emotion-finetune/checkpoint-4500",
5
+ "epoch": 22.393769470404983,
6
  "eval_steps": 500,
7
+ "global_step": 4500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
554
  "eval_samples_per_second": 11.474,
555
  "eval_steps_per_second": 1.474,
556
  "step": 3500
557
+ },
558
+ {
559
+ "epoch": 17.66791277258567,
560
+ "grad_norm": 1.046875,
561
+ "learning_rate": 2.4063399404745724e-06,
562
+ "loss": 1.2199,
563
+ "step": 3550
564
+ },
565
+ {
566
+ "epoch": 17.917133956386294,
567
+ "grad_norm": 1.625,
568
+ "learning_rate": 2.2594457607888917e-06,
569
+ "loss": 1.217,
570
+ "step": 3600
571
+ },
572
+ {
573
+ "epoch": 18.16448598130841,
574
+ "grad_norm": 1.3515625,
575
+ "learning_rate": 2.115858117971553e-06,
576
+ "loss": 1.2021,
577
+ "step": 3650
578
+ },
579
+ {
580
+ "epoch": 18.413707165109034,
581
+ "grad_norm": 1.484375,
582
+ "learning_rate": 1.9757502535618137e-06,
583
+ "loss": 1.2159,
584
+ "step": 3700
585
+ },
586
+ {
587
+ "epoch": 18.662928348909656,
588
+ "grad_norm": 1.015625,
589
+ "learning_rate": 1.839291210672407e-06,
590
+ "loss": 1.2117,
591
+ "step": 3750
592
+ },
593
+ {
594
+ "epoch": 18.91214953271028,
595
+ "grad_norm": 1.234375,
596
+ "learning_rate": 1.7066456300354462e-06,
597
+ "loss": 1.225,
598
+ "step": 3800
599
+ },
600
+ {
601
+ "epoch": 19.1595015576324,
602
+ "grad_norm": 1.390625,
603
+ "learning_rate": 1.577973551359877e-06,
604
+ "loss": 1.2239,
605
+ "step": 3850
606
+ },
607
+ {
608
+ "epoch": 19.40872274143302,
609
+ "grad_norm": 1.6015625,
610
+ "learning_rate": 1.453430220240178e-06,
611
+ "loss": 1.209,
612
+ "step": 3900
613
+ },
614
+ {
615
+ "epoch": 19.657943925233646,
616
+ "grad_norm": 1.203125,
617
+ "learning_rate": 1.333165900849255e-06,
618
+ "loss": 1.2148,
619
+ "step": 3950
620
+ },
621
+ {
622
+ "epoch": 19.907165109034267,
623
+ "grad_norm": 1.1484375,
624
+ "learning_rate": 1.2173256946415214e-06,
625
+ "loss": 1.2024,
626
+ "step": 4000
627
+ },
628
+ {
629
+ "epoch": 19.907165109034267,
630
+ "eval_loss": 1.2013256549835205,
631
+ "eval_runtime": 15.5958,
632
+ "eval_samples_per_second": 11.477,
633
+ "eval_steps_per_second": 1.475,
634
+ "step": 4000
635
+ },
636
+ {
637
+ "epoch": 20.154517133956386,
638
+ "grad_norm": 1.1328125,
639
+ "learning_rate": 1.106049365284918e-06,
640
+ "loss": 1.2362,
641
+ "step": 4050
642
+ },
643
+ {
644
+ "epoch": 20.40373831775701,
645
+ "grad_norm": 1.0078125,
646
+ "learning_rate": 9.994711700330779e-07,
647
+ "loss": 1.2212,
648
+ "step": 4100
649
+ },
650
+ {
651
+ "epoch": 20.652959501557632,
652
+ "grad_norm": 1.5078125,
653
+ "learning_rate": 8.97719697741104e-07,
654
+ "loss": 1.1908,
655
+ "step": 4150
656
+ },
657
+ {
658
+ "epoch": 20.902180685358257,
659
+ "grad_norm": 1.0390625,
660
+ "learning_rate": 8.009177137203794e-07,
661
+ "loss": 1.2261,
662
+ "step": 4200
663
+ },
664
+ {
665
+ "epoch": 21.149532710280372,
666
+ "grad_norm": 1.046875,
667
+ "learning_rate": 7.091820116196152e-07,
668
+ "loss": 1.1987,
669
+ "step": 4250
670
+ },
671
+ {
672
+ "epoch": 21.398753894080997,
673
+ "grad_norm": 1.46875,
674
+ "learning_rate": 6.2262327251084e-07,
675
+ "loss": 1.2089,
676
+ "step": 4300
677
+ },
678
+ {
679
+ "epoch": 21.64797507788162,
680
+ "grad_norm": 1.1796875,
681
+ "learning_rate": 5.413459313503272e-07,
682
+ "loss": 1.2162,
683
+ "step": 4350
684
+ },
685
+ {
686
+ "epoch": 21.897196261682243,
687
+ "grad_norm": 1.0859375,
688
+ "learning_rate": 4.654480509756082e-07,
689
+ "loss": 1.216,
690
+ "step": 4400
691
+ },
692
+ {
693
+ "epoch": 22.144548286604362,
694
+ "grad_norm": 1.484375,
695
+ "learning_rate": 3.9502120379057764e-07,
696
+ "loss": 1.2089,
697
+ "step": 4450
698
+ },
699
+ {
700
+ "epoch": 22.393769470404983,
701
+ "grad_norm": 1.171875,
702
+ "learning_rate": 3.301503612814444e-07,
703
+ "loss": 1.2161,
704
+ "step": 4500
705
+ },
706
+ {
707
+ "epoch": 22.393769470404983,
708
+ "eval_loss": 1.2012678384780884,
709
+ "eval_runtime": 15.6175,
710
+ "eval_samples_per_second": 11.462,
711
+ "eval_steps_per_second": 1.473,
712
+ "step": 4500
713
  }
714
  ],
715
  "logging_steps": 50,
 
729
  "attributes": {}
730
  }
731
  },
732
+ "total_flos": 1.2451918079066112e+18,
733
  "train_batch_size": 1,
734
  "trial_name": null,
735
  "trial_params": null