robertou2 commited on
Commit
a2ead94
·
verified ·
1 Parent(s): 8998d48

Upload folder using huggingface_hub

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3161508c0505b1add3d5708f8dcbe8451ca9f16d9c4da4eb15f72f0213ea5ab3
3
  size 369133600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:213b096de341a50e0a4985859c6ea12250ce2d41200faa95746f3833d77778ba
3
  size 369133600
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2de0050503da4c1dfedc106d96c40be2c5e711da4076acf99032873e9af7004
3
  size 738413771
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96b2ad4cb92f0741196486f703f3eac971910ec2af67ea8dced1c39144ae41bd
3
  size 738413771
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e961bd000aad0805f4df299e9e54c009416c83b5abff201439df38b2efb708fc
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a98db1d36dba143442348323cec201512ec884401e693908cf5136c8d261220e
3
  size 14645
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ba3ad0939c5c784a2980c55ea2ede819b84a24d205e18f93e06504838daa120
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62f9a2cf5827a9b18dca293c2a36bb613a8624ee231ceb47fae6e66a15cfcac4
3
  size 1465
trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 59,
3
- "best_metric": 0.6262253522872925,
4
- "best_model_checkpoint": "/content/drive/MyDrive/lora_model/outputs/task15_microsoft/Phi-4-mini-instruct/checkpoint-50",
5
- "epoch": 3.1578947368421053,
6
  "eval_steps": 1,
7
- "global_step": 60,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -608,306 +608,6 @@
608
  "eval_samples_per_second": 8.91,
609
  "eval_steps_per_second": 1.188,
610
  "step": 40
611
- },
612
- {
613
- "epoch": 2.1578947368421053,
614
- "grad_norm": 0.4683343768119812,
615
- "learning_rate": 0.00020658795558326743,
616
- "loss": 0.7401,
617
- "step": 41
618
- },
619
- {
620
- "epoch": 2.1578947368421053,
621
- "eval_loss": 0.7358477711677551,
622
- "eval_runtime": 3.3619,
623
- "eval_samples_per_second": 8.924,
624
- "eval_steps_per_second": 1.19,
625
- "step": 41
626
- },
627
- {
628
- "epoch": 2.2105263157894735,
629
- "grad_norm": 0.6029678583145142,
630
- "learning_rate": 0.0001895195261000831,
631
- "loss": 0.6829,
632
- "step": 42
633
- },
634
- {
635
- "epoch": 2.2105263157894735,
636
- "eval_loss": 0.7268175482749939,
637
- "eval_runtime": 3.3732,
638
- "eval_samples_per_second": 8.894,
639
- "eval_steps_per_second": 1.186,
640
- "step": 42
641
- },
642
- {
643
- "epoch": 2.263157894736842,
644
- "grad_norm": 0.6847506761550903,
645
- "learning_rate": 0.00017274575140626317,
646
- "loss": 0.7923,
647
- "step": 43
648
- },
649
- {
650
- "epoch": 2.263157894736842,
651
- "eval_loss": 0.7140093445777893,
652
- "eval_runtime": 3.3766,
653
- "eval_samples_per_second": 8.885,
654
- "eval_steps_per_second": 1.185,
655
- "step": 43
656
- },
657
- {
658
- "epoch": 2.3157894736842106,
659
- "grad_norm": 0.6127113699913025,
660
- "learning_rate": 0.00015634835164602198,
661
- "loss": 0.7396,
662
- "step": 44
663
- },
664
- {
665
- "epoch": 2.3157894736842106,
666
- "eval_loss": 0.6983242034912109,
667
- "eval_runtime": 3.3684,
668
- "eval_samples_per_second": 8.906,
669
- "eval_steps_per_second": 1.188,
670
- "step": 44
671
- },
672
- {
673
- "epoch": 2.3684210526315788,
674
- "grad_norm": 0.538176953792572,
675
- "learning_rate": 0.00014040721330273062,
676
- "loss": 0.6553,
677
- "step": 45
678
- },
679
- {
680
- "epoch": 2.3684210526315788,
681
- "eval_loss": 0.6850975155830383,
682
- "eval_runtime": 3.3723,
683
- "eval_samples_per_second": 8.896,
684
- "eval_steps_per_second": 1.186,
685
- "step": 45
686
- },
687
- {
688
- "epoch": 2.4210526315789473,
689
- "grad_norm": 0.6419486999511719,
690
- "learning_rate": 0.00012500000000000006,
691
- "loss": 0.7364,
692
- "step": 46
693
- },
694
- {
695
- "epoch": 2.4210526315789473,
696
- "eval_loss": 0.6766163110733032,
697
- "eval_runtime": 3.3706,
698
- "eval_samples_per_second": 8.9,
699
- "eval_steps_per_second": 1.187,
700
- "step": 46
701
- },
702
- {
703
- "epoch": 2.473684210526316,
704
- "grad_norm": 0.5997453331947327,
705
- "learning_rate": 0.00011020177413231333,
706
- "loss": 0.6901,
707
- "step": 47
708
- },
709
- {
710
- "epoch": 2.473684210526316,
711
- "eval_loss": 0.667664110660553,
712
- "eval_runtime": 3.3701,
713
- "eval_samples_per_second": 8.902,
714
- "eval_steps_per_second": 1.187,
715
- "step": 47
716
- },
717
- {
718
- "epoch": 2.526315789473684,
719
- "grad_norm": 0.5617692470550537,
720
- "learning_rate": 9.608463116858542e-05,
721
- "loss": 0.6299,
722
- "step": 48
723
- },
724
- {
725
- "epoch": 2.526315789473684,
726
- "eval_loss": 0.658656895160675,
727
- "eval_runtime": 3.3698,
728
- "eval_samples_per_second": 8.903,
729
- "eval_steps_per_second": 1.187,
730
- "step": 48
731
- },
732
- {
733
- "epoch": 2.5789473684210527,
734
- "grad_norm": 0.5850865840911865,
735
- "learning_rate": 8.271734841028553e-05,
736
- "loss": 0.717,
737
- "step": 49
738
- },
739
- {
740
- "epoch": 2.5789473684210527,
741
- "eval_loss": 0.6522302627563477,
742
- "eval_runtime": 3.3689,
743
- "eval_samples_per_second": 8.905,
744
- "eval_steps_per_second": 1.187,
745
- "step": 49
746
- },
747
- {
748
- "epoch": 2.6315789473684212,
749
- "grad_norm": 0.5645343661308289,
750
- "learning_rate": 7.016504991533726e-05,
751
- "loss": 0.6396,
752
- "step": 50
753
- },
754
- {
755
- "epoch": 2.6315789473684212,
756
- "eval_loss": 0.6460065841674805,
757
- "eval_runtime": 3.3689,
758
- "eval_samples_per_second": 8.905,
759
- "eval_steps_per_second": 1.187,
760
- "step": 50
761
- },
762
- {
763
- "epoch": 2.6842105263157894,
764
- "grad_norm": 0.6689581871032715,
765
- "learning_rate": 5.848888922025553e-05,
766
- "loss": 0.6803,
767
- "step": 51
768
- },
769
- {
770
- "epoch": 2.6842105263157894,
771
- "eval_loss": 0.6413608193397522,
772
- "eval_runtime": 3.3668,
773
- "eval_samples_per_second": 8.911,
774
- "eval_steps_per_second": 1.188,
775
- "step": 51
776
- },
777
- {
778
- "epoch": 2.736842105263158,
779
- "grad_norm": 0.5473130941390991,
780
- "learning_rate": 4.7745751406263163e-05,
781
- "loss": 0.6535,
782
- "step": 52
783
- },
784
- {
785
- "epoch": 2.736842105263158,
786
- "eval_loss": 0.6371581554412842,
787
- "eval_runtime": 3.371,
788
- "eval_samples_per_second": 8.9,
789
- "eval_steps_per_second": 1.187,
790
- "step": 52
791
- },
792
- {
793
- "epoch": 2.7894736842105265,
794
- "grad_norm": 0.6696008443832397,
795
- "learning_rate": 3.798797596089351e-05,
796
- "loss": 0.744,
797
- "step": 53
798
- },
799
- {
800
- "epoch": 2.7894736842105265,
801
- "eval_loss": 0.6349052786827087,
802
- "eval_runtime": 3.3769,
803
- "eval_samples_per_second": 8.884,
804
- "eval_steps_per_second": 1.185,
805
- "step": 53
806
- },
807
- {
808
- "epoch": 2.8421052631578947,
809
- "grad_norm": 0.5501115918159485,
810
- "learning_rate": 2.9263101785268254e-05,
811
- "loss": 0.6701,
812
- "step": 54
813
- },
814
- {
815
- "epoch": 2.8421052631578947,
816
- "eval_loss": 0.6328269839286804,
817
- "eval_runtime": 3.3789,
818
- "eval_samples_per_second": 8.879,
819
- "eval_steps_per_second": 1.184,
820
- "step": 54
821
- },
822
- {
823
- "epoch": 2.8947368421052633,
824
- "grad_norm": 0.4939638078212738,
825
- "learning_rate": 2.1613635589349755e-05,
826
- "loss": 0.6646,
827
- "step": 55
828
- },
829
- {
830
- "epoch": 2.8947368421052633,
831
- "eval_loss": 0.6300433278083801,
832
- "eval_runtime": 3.3726,
833
- "eval_samples_per_second": 8.895,
834
- "eval_steps_per_second": 1.186,
835
- "step": 55
836
- },
837
- {
838
- "epoch": 2.9473684210526314,
839
- "grad_norm": 0.61789470911026,
840
- "learning_rate": 1.5076844803522921e-05,
841
- "loss": 0.7293,
842
- "step": 56
843
- },
844
- {
845
- "epoch": 2.9473684210526314,
846
- "eval_loss": 0.6280367374420166,
847
- "eval_runtime": 3.3765,
848
- "eval_samples_per_second": 8.885,
849
- "eval_steps_per_second": 1.185,
850
- "step": 56
851
- },
852
- {
853
- "epoch": 3.0,
854
- "grad_norm": 0.5384323000907898,
855
- "learning_rate": 9.684576015420277e-06,
856
- "loss": 0.6898,
857
- "step": 57
858
- },
859
- {
860
- "epoch": 3.0,
861
- "eval_loss": 0.6272571086883545,
862
- "eval_runtime": 3.3704,
863
- "eval_samples_per_second": 8.901,
864
- "eval_steps_per_second": 1.187,
865
- "step": 57
866
- },
867
- {
868
- "epoch": 3.0526315789473686,
869
- "grad_norm": 0.5376719236373901,
870
- "learning_rate": 5.463099816548578e-06,
871
- "loss": 0.6448,
872
- "step": 58
873
- },
874
- {
875
- "epoch": 3.0526315789473686,
876
- "eval_loss": 0.6269372701644897,
877
- "eval_runtime": 3.3667,
878
- "eval_samples_per_second": 8.911,
879
- "eval_steps_per_second": 1.188,
880
- "step": 58
881
- },
882
- {
883
- "epoch": 3.1052631578947367,
884
- "grad_norm": 0.515751838684082,
885
- "learning_rate": 2.4329828146074094e-06,
886
- "loss": 0.7027,
887
- "step": 59
888
- },
889
- {
890
- "epoch": 3.1052631578947367,
891
- "eval_loss": 0.6262253522872925,
892
- "eval_runtime": 3.3654,
893
- "eval_samples_per_second": 8.914,
894
- "eval_steps_per_second": 1.189,
895
- "step": 59
896
- },
897
- {
898
- "epoch": 3.1578947368421053,
899
- "grad_norm": 0.5370931029319763,
900
- "learning_rate": 6.089874350439506e-07,
901
- "loss": 0.6594,
902
- "step": 60
903
- },
904
- {
905
- "epoch": 3.1578947368421053,
906
- "eval_loss": 0.6268424391746521,
907
- "eval_runtime": 3.3618,
908
- "eval_samples_per_second": 8.924,
909
- "eval_steps_per_second": 1.19,
910
- "step": 60
911
  }
912
  ],
913
  "logging_steps": 1,
@@ -922,12 +622,12 @@
922
  "should_evaluate": false,
923
  "should_log": false,
924
  "should_save": true,
925
- "should_training_stop": true
926
  },
927
  "attributes": {}
928
  }
929
  },
930
- "total_flos": 2488197563258880.0,
931
  "train_batch_size": 1,
932
  "trial_name": null,
933
  "trial_params": null
 
1
  {
2
+ "best_global_step": 40,
3
+ "best_metric": 0.7394412159919739,
4
+ "best_model_checkpoint": "/content/drive/MyDrive/lora_model/outputs/task15_microsoft/Phi-4-mini-instruct/checkpoint-40",
5
+ "epoch": 2.1052631578947367,
6
  "eval_steps": 1,
7
+ "global_step": 40,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
608
  "eval_samples_per_second": 8.91,
609
  "eval_steps_per_second": 1.188,
610
  "step": 40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
611
  }
612
  ],
613
  "logging_steps": 1,
 
622
  "should_evaluate": false,
623
  "should_log": false,
624
  "should_save": true,
625
+ "should_training_stop": false
626
  },
627
  "attributes": {}
628
  }
629
  },
630
+ "total_flos": 1661495727175680.0,
631
  "train_batch_size": 1,
632
  "trial_name": null,
633
  "trial_params": null