robertou2 commited on
Commit
8998d48
·
verified ·
1 Parent(s): 507f518

Upload folder using huggingface_hub

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89daabbba6e8fbb223ea2dadd6cafc0473e7831b6a8a31965fd25a932502f63a
3
  size 369133600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3161508c0505b1add3d5708f8dcbe8451ca9f16d9c4da4eb15f72f0213ea5ab3
3
  size 369133600
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:53b455474ae0967c9dc13e4a970734e024e902b0e0593a5ebcb87d06c41d8d84
3
  size 738413771
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2de0050503da4c1dfedc106d96c40be2c5e711da4076acf99032873e9af7004
3
  size 738413771
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:532826bfd6ab0d9c120628c9ba8dbb5c027e661038baccc2d23e0946927a6e4b
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e961bd000aad0805f4df299e9e54c009416c83b5abff201439df38b2efb708fc
3
  size 14645
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2692bc6d8e85cffcbf5a9395fe1431563b67e6de43846a9570babfa256843214
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ba3ad0939c5c784a2980c55ea2ede819b84a24d205e18f93e06504838daa120
3
  size 1465
trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 50,
3
- "best_metric": 0.6460065841674805,
4
  "best_model_checkpoint": "/content/drive/MyDrive/lora_model/outputs/task15_microsoft/Phi-4-mini-instruct/checkpoint-50",
5
- "epoch": 2.6315789473684212,
6
  "eval_steps": 1,
7
- "global_step": 50,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -758,6 +758,156 @@
758
  "eval_samples_per_second": 8.905,
759
  "eval_steps_per_second": 1.187,
760
  "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
761
  }
762
  ],
763
  "logging_steps": 1,
@@ -772,12 +922,12 @@
772
  "should_evaluate": false,
773
  "should_log": false,
774
  "should_save": true,
775
- "should_training_stop": false
776
  },
777
  "attributes": {}
778
  }
779
  },
780
- "total_flos": 2071824550963200.0,
781
  "train_batch_size": 1,
782
  "trial_name": null,
783
  "trial_params": null
 
1
  {
2
+ "best_global_step": 59,
3
+ "best_metric": 0.6262253522872925,
4
  "best_model_checkpoint": "/content/drive/MyDrive/lora_model/outputs/task15_microsoft/Phi-4-mini-instruct/checkpoint-50",
5
+ "epoch": 3.1578947368421053,
6
  "eval_steps": 1,
7
+ "global_step": 60,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
758
  "eval_samples_per_second": 8.905,
759
  "eval_steps_per_second": 1.187,
760
  "step": 50
761
+ },
762
+ {
763
+ "epoch": 2.6842105263157894,
764
+ "grad_norm": 0.6689581871032715,
765
+ "learning_rate": 5.848888922025553e-05,
766
+ "loss": 0.6803,
767
+ "step": 51
768
+ },
769
+ {
770
+ "epoch": 2.6842105263157894,
771
+ "eval_loss": 0.6413608193397522,
772
+ "eval_runtime": 3.3668,
773
+ "eval_samples_per_second": 8.911,
774
+ "eval_steps_per_second": 1.188,
775
+ "step": 51
776
+ },
777
+ {
778
+ "epoch": 2.736842105263158,
779
+ "grad_norm": 0.5473130941390991,
780
+ "learning_rate": 4.7745751406263163e-05,
781
+ "loss": 0.6535,
782
+ "step": 52
783
+ },
784
+ {
785
+ "epoch": 2.736842105263158,
786
+ "eval_loss": 0.6371581554412842,
787
+ "eval_runtime": 3.371,
788
+ "eval_samples_per_second": 8.9,
789
+ "eval_steps_per_second": 1.187,
790
+ "step": 52
791
+ },
792
+ {
793
+ "epoch": 2.7894736842105265,
794
+ "grad_norm": 0.6696008443832397,
795
+ "learning_rate": 3.798797596089351e-05,
796
+ "loss": 0.744,
797
+ "step": 53
798
+ },
799
+ {
800
+ "epoch": 2.7894736842105265,
801
+ "eval_loss": 0.6349052786827087,
802
+ "eval_runtime": 3.3769,
803
+ "eval_samples_per_second": 8.884,
804
+ "eval_steps_per_second": 1.185,
805
+ "step": 53
806
+ },
807
+ {
808
+ "epoch": 2.8421052631578947,
809
+ "grad_norm": 0.5501115918159485,
810
+ "learning_rate": 2.9263101785268254e-05,
811
+ "loss": 0.6701,
812
+ "step": 54
813
+ },
814
+ {
815
+ "epoch": 2.8421052631578947,
816
+ "eval_loss": 0.6328269839286804,
817
+ "eval_runtime": 3.3789,
818
+ "eval_samples_per_second": 8.879,
819
+ "eval_steps_per_second": 1.184,
820
+ "step": 54
821
+ },
822
+ {
823
+ "epoch": 2.8947368421052633,
824
+ "grad_norm": 0.4939638078212738,
825
+ "learning_rate": 2.1613635589349755e-05,
826
+ "loss": 0.6646,
827
+ "step": 55
828
+ },
829
+ {
830
+ "epoch": 2.8947368421052633,
831
+ "eval_loss": 0.6300433278083801,
832
+ "eval_runtime": 3.3726,
833
+ "eval_samples_per_second": 8.895,
834
+ "eval_steps_per_second": 1.186,
835
+ "step": 55
836
+ },
837
+ {
838
+ "epoch": 2.9473684210526314,
839
+ "grad_norm": 0.61789470911026,
840
+ "learning_rate": 1.5076844803522921e-05,
841
+ "loss": 0.7293,
842
+ "step": 56
843
+ },
844
+ {
845
+ "epoch": 2.9473684210526314,
846
+ "eval_loss": 0.6280367374420166,
847
+ "eval_runtime": 3.3765,
848
+ "eval_samples_per_second": 8.885,
849
+ "eval_steps_per_second": 1.185,
850
+ "step": 56
851
+ },
852
+ {
853
+ "epoch": 3.0,
854
+ "grad_norm": 0.5384323000907898,
855
+ "learning_rate": 9.684576015420277e-06,
856
+ "loss": 0.6898,
857
+ "step": 57
858
+ },
859
+ {
860
+ "epoch": 3.0,
861
+ "eval_loss": 0.6272571086883545,
862
+ "eval_runtime": 3.3704,
863
+ "eval_samples_per_second": 8.901,
864
+ "eval_steps_per_second": 1.187,
865
+ "step": 57
866
+ },
867
+ {
868
+ "epoch": 3.0526315789473686,
869
+ "grad_norm": 0.5376719236373901,
870
+ "learning_rate": 5.463099816548578e-06,
871
+ "loss": 0.6448,
872
+ "step": 58
873
+ },
874
+ {
875
+ "epoch": 3.0526315789473686,
876
+ "eval_loss": 0.6269372701644897,
877
+ "eval_runtime": 3.3667,
878
+ "eval_samples_per_second": 8.911,
879
+ "eval_steps_per_second": 1.188,
880
+ "step": 58
881
+ },
882
+ {
883
+ "epoch": 3.1052631578947367,
884
+ "grad_norm": 0.515751838684082,
885
+ "learning_rate": 2.4329828146074094e-06,
886
+ "loss": 0.7027,
887
+ "step": 59
888
+ },
889
+ {
890
+ "epoch": 3.1052631578947367,
891
+ "eval_loss": 0.6262253522872925,
892
+ "eval_runtime": 3.3654,
893
+ "eval_samples_per_second": 8.914,
894
+ "eval_steps_per_second": 1.189,
895
+ "step": 59
896
+ },
897
+ {
898
+ "epoch": 3.1578947368421053,
899
+ "grad_norm": 0.5370931029319763,
900
+ "learning_rate": 6.089874350439506e-07,
901
+ "loss": 0.6594,
902
+ "step": 60
903
+ },
904
+ {
905
+ "epoch": 3.1578947368421053,
906
+ "eval_loss": 0.6268424391746521,
907
+ "eval_runtime": 3.3618,
908
+ "eval_samples_per_second": 8.924,
909
+ "eval_steps_per_second": 1.19,
910
+ "step": 60
911
  }
912
  ],
913
  "logging_steps": 1,
 
922
  "should_evaluate": false,
923
  "should_log": false,
924
  "should_save": true,
925
+ "should_training_stop": true
926
  },
927
  "attributes": {}
928
  }
929
  },
930
+ "total_flos": 2488197563258880.0,
931
  "train_batch_size": 1,
932
  "trial_name": null,
933
  "trial_params": null