MoneerProject commited on
Commit
d790dfb
·
verified ·
1 Parent(s): d70462e

Training in progress, step 3500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9cb935ab771f33e20a7b505537eb78c6d9f0917f5a90dac3d0266df181fcefd2
3
  size 966995080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:211c3e081cd4d45139c1b37769c82a39d49594bd6cb32f70c0f4dfa85757fa43
3
  size 966995080
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be80de71962a2bed0e9e9eceb9370db35d5115beb28a844300a0d8836493df5a
3
  size 1228890682
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c6b9f0ce0751785007dca8a03a72091fb50b8a1cf30251390dddfa7806b03fb
3
  size 1228890682
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7d89181cfd314a69a1aa1b9ba842a99dedf4dca5b395632f38021ef8424361f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e3d8253d03fcee7ed2b170bbb1632a1a5520714c44e4406d8dab09e403df5b0
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b5cc1ed35f48cc59384f56c8de34e461d08b0f64957aa14f6f8878a25fc4550
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2daaa0057b288ba4d4f56331182174ee76136e14690065d8b128321e93809fdf
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7d6af335768dfd8ff7047730bbedefbd3d6088c1613499f72ef42fc1e0161ba
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c382cdf70b3eb758a0dda9a8be2ad82136bc5362f13a6d2d16779614d18daa02
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 3000,
3
- "best_metric": 44.32302984611291,
4
- "best_model_checkpoint": "./whisper-small-warsh-quran-v3/checkpoint-3000",
5
- "epoch": 0.6288648988575621,
6
  "eval_steps": 500,
7
- "global_step": 3000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -902,6 +902,155 @@
902
  "eval_steps_per_second": 0.333,
903
  "eval_wer": 44.32302984611291,
904
  "step": 3000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
905
  }
906
  ],
907
  "logging_steps": 25,
@@ -921,7 +1070,7 @@
921
  "attributes": {}
922
  }
923
  },
924
- "total_flos": 6.92604960768e+18,
925
  "train_batch_size": 4,
926
  "trial_name": null,
927
  "trial_params": null
 
1
  {
2
+ "best_global_step": 3500,
3
+ "best_metric": 40.41509601312697,
4
+ "best_model_checkpoint": "./whisper-small-warsh-quran-v3/checkpoint-3500",
5
+ "epoch": 0.7336757153338225,
6
  "eval_steps": 500,
7
+ "global_step": 3500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
902
  "eval_steps_per_second": 0.333,
903
  "eval_wer": 44.32302984611291,
904
  "step": 3000
905
+ },
906
+ {
907
+ "epoch": 0.6341054396813751,
908
+ "grad_norm": 3.61225962638855,
909
+ "learning_rate": 5.180869565217391e-06,
910
+ "loss": 0.2671,
911
+ "step": 3025
912
+ },
913
+ {
914
+ "epoch": 0.6393459805051881,
915
+ "grad_norm": 6.848639011383057,
916
+ "learning_rate": 5.137391304347826e-06,
917
+ "loss": 0.2305,
918
+ "step": 3050
919
+ },
920
+ {
921
+ "epoch": 0.6445865213290012,
922
+ "grad_norm": 5.066479206085205,
923
+ "learning_rate": 5.0939130434782615e-06,
924
+ "loss": 0.2075,
925
+ "step": 3075
926
+ },
927
+ {
928
+ "epoch": 0.6498270621528142,
929
+ "grad_norm": 10.204203605651855,
930
+ "learning_rate": 5.050434782608697e-06,
931
+ "loss": 0.2618,
932
+ "step": 3100
933
+ },
934
+ {
935
+ "epoch": 0.6550676029766271,
936
+ "grad_norm": 4.369320869445801,
937
+ "learning_rate": 5.006956521739132e-06,
938
+ "loss": 0.3662,
939
+ "step": 3125
940
+ },
941
+ {
942
+ "epoch": 0.6603081438004402,
943
+ "grad_norm": 5.38928747177124,
944
+ "learning_rate": 4.963478260869566e-06,
945
+ "loss": 0.3384,
946
+ "step": 3150
947
+ },
948
+ {
949
+ "epoch": 0.6655486846242532,
950
+ "grad_norm": 13.416193962097168,
951
+ "learning_rate": 4.92e-06,
952
+ "loss": 0.3091,
953
+ "step": 3175
954
+ },
955
+ {
956
+ "epoch": 0.6707892254480663,
957
+ "grad_norm": 3.561743974685669,
958
+ "learning_rate": 4.8765217391304355e-06,
959
+ "loss": 0.2343,
960
+ "step": 3200
961
+ },
962
+ {
963
+ "epoch": 0.6760297662718793,
964
+ "grad_norm": 15.610285758972168,
965
+ "learning_rate": 4.83304347826087e-06,
966
+ "loss": 0.356,
967
+ "step": 3225
968
+ },
969
+ {
970
+ "epoch": 0.6812703070956923,
971
+ "grad_norm": 7.896173477172852,
972
+ "learning_rate": 4.789565217391305e-06,
973
+ "loss": 0.2121,
974
+ "step": 3250
975
+ },
976
+ {
977
+ "epoch": 0.6865108479195053,
978
+ "grad_norm": 6.182321548461914,
979
+ "learning_rate": 4.746086956521739e-06,
980
+ "loss": 0.2922,
981
+ "step": 3275
982
+ },
983
+ {
984
+ "epoch": 0.6917513887433183,
985
+ "grad_norm": 14.184751510620117,
986
+ "learning_rate": 4.702608695652174e-06,
987
+ "loss": 0.3107,
988
+ "step": 3300
989
+ },
990
+ {
991
+ "epoch": 0.6969919295671313,
992
+ "grad_norm": 3.7635350227355957,
993
+ "learning_rate": 4.6591304347826085e-06,
994
+ "loss": 0.2339,
995
+ "step": 3325
996
+ },
997
+ {
998
+ "epoch": 0.7022324703909444,
999
+ "grad_norm": 4.777770519256592,
1000
+ "learning_rate": 4.615652173913044e-06,
1001
+ "loss": 0.2221,
1002
+ "step": 3350
1003
+ },
1004
+ {
1005
+ "epoch": 0.7074730112147574,
1006
+ "grad_norm": 6.8489556312561035,
1007
+ "learning_rate": 4.572173913043479e-06,
1008
+ "loss": 0.3584,
1009
+ "step": 3375
1010
+ },
1011
+ {
1012
+ "epoch": 0.7127135520385703,
1013
+ "grad_norm": 8.557856559753418,
1014
+ "learning_rate": 4.528695652173913e-06,
1015
+ "loss": 0.3055,
1016
+ "step": 3400
1017
+ },
1018
+ {
1019
+ "epoch": 0.7179540928623834,
1020
+ "grad_norm": 6.584381580352783,
1021
+ "learning_rate": 4.485217391304348e-06,
1022
+ "loss": 0.2357,
1023
+ "step": 3425
1024
+ },
1025
+ {
1026
+ "epoch": 0.7231946336861964,
1027
+ "grad_norm": 4.871785640716553,
1028
+ "learning_rate": 4.4417391304347825e-06,
1029
+ "loss": 0.2408,
1030
+ "step": 3450
1031
+ },
1032
+ {
1033
+ "epoch": 0.7284351745100094,
1034
+ "grad_norm": 8.363597869873047,
1035
+ "learning_rate": 4.398260869565218e-06,
1036
+ "loss": 0.2942,
1037
+ "step": 3475
1038
+ },
1039
+ {
1040
+ "epoch": 0.7336757153338225,
1041
+ "grad_norm": 8.755147933959961,
1042
+ "learning_rate": 4.354782608695653e-06,
1043
+ "loss": 0.2587,
1044
+ "step": 3500
1045
+ },
1046
+ {
1047
+ "epoch": 0.7336757153338225,
1048
+ "eval_loss": 0.271077036857605,
1049
+ "eval_runtime": 7162.8947,
1050
+ "eval_samples_per_second": 1.332,
1051
+ "eval_steps_per_second": 0.333,
1052
+ "eval_wer": 40.41509601312697,
1053
+ "step": 3500
1054
  }
1055
  ],
1056
  "logging_steps": 25,
 
1070
  "attributes": {}
1071
  }
1072
  },
1073
+ "total_flos": 8.08039120896e+18,
1074
  "train_batch_size": 4,
1075
  "trial_name": null,
1076
  "trial_params": null