Token Classification
Safetensors
English
deberta-v2
shawnrushefsky commited on
Commit
9e35bf4
Β·
1 Parent(s): d18158b

checkpoint

Browse files
{checkpoint-6176 β†’ checkpoint-7334}/added_tokens.json RENAMED
File without changes
{checkpoint-6176 β†’ checkpoint-7334}/config.json RENAMED
File without changes
{checkpoint-6176 β†’ checkpoint-7334}/special_tokens_map.json RENAMED
File without changes
{checkpoint-6176 β†’ checkpoint-7334}/tokenizer.json RENAMED
File without changes
{checkpoint-6176 β†’ checkpoint-7334}/tokenizer_config.json RENAMED
File without changes
{checkpoint-6176 β†’ checkpoint-7334}/trainer_state.json RENAMED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.8019737696403064,
6
  "eval_steps": 500,
7
- "global_step": 6176,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -869,6 +869,167 @@
869
  "learning_rate": 1.702723325878174e-05,
870
  "loss": 0.1903,
871
  "step": 6150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
872
  }
873
  ],
874
  "logging_steps": 50,
@@ -888,7 +1049,7 @@
888
  "attributes": {}
889
  }
890
  },
891
- "total_flos": 5.1647602330330726e+17,
892
  "train_batch_size": 40,
893
  "trial_name": null,
894
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.9523438514478639,
6
  "eval_steps": 500,
7
+ "global_step": 7334,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
869
  "learning_rate": 1.702723325878174e-05,
870
  "loss": 0.1903,
871
  "step": 6150
872
+ },
873
+ {
874
+ "epoch": 0.8050902480197377,
875
+ "grad_norm": 0.5991400480270386,
876
+ "learning_rate": 1.700092093145639e-05,
877
+ "loss": 0.197,
878
+ "step": 6200
879
+ },
880
+ {
881
+ "epoch": 0.8115829113102194,
882
+ "grad_norm": 0.9522245526313782,
883
+ "learning_rate": 1.6974608604131037e-05,
884
+ "loss": 0.1962,
885
+ "step": 6250
886
+ },
887
+ {
888
+ "epoch": 0.8180755746007012,
889
+ "grad_norm": 0.8645381927490234,
890
+ "learning_rate": 1.6948296276805686e-05,
891
+ "loss": 0.1901,
892
+ "step": 6300
893
+ },
894
+ {
895
+ "epoch": 0.8245682378911829,
896
+ "grad_norm": 0.5243034958839417,
897
+ "learning_rate": 1.6921983949480334e-05,
898
+ "loss": 0.194,
899
+ "step": 6350
900
+ },
901
+ {
902
+ "epoch": 0.8310609011816648,
903
+ "grad_norm": 0.5842151641845703,
904
+ "learning_rate": 1.6895671622154982e-05,
905
+ "loss": 0.1917,
906
+ "step": 6400
907
+ },
908
+ {
909
+ "epoch": 0.8375535644721465,
910
+ "grad_norm": 0.6111485362052917,
911
+ "learning_rate": 1.6869359294829627e-05,
912
+ "loss": 0.2042,
913
+ "step": 6450
914
+ },
915
+ {
916
+ "epoch": 0.8440462277626283,
917
+ "grad_norm": 0.6515288949012756,
918
+ "learning_rate": 1.684304696750428e-05,
919
+ "loss": 0.2032,
920
+ "step": 6500
921
+ },
922
+ {
923
+ "epoch": 0.85053889105311,
924
+ "grad_norm": 0.7596396207809448,
925
+ "learning_rate": 1.6816734640178924e-05,
926
+ "loss": 0.193,
927
+ "step": 6550
928
+ },
929
+ {
930
+ "epoch": 0.8570315543435918,
931
+ "grad_norm": 0.6944254636764526,
932
+ "learning_rate": 1.6790422312853575e-05,
933
+ "loss": 0.1899,
934
+ "step": 6600
935
+ },
936
+ {
937
+ "epoch": 0.8635242176340735,
938
+ "grad_norm": 0.6190508604049683,
939
+ "learning_rate": 1.676410998552822e-05,
940
+ "loss": 0.1987,
941
+ "step": 6650
942
+ },
943
+ {
944
+ "epoch": 0.8700168809245552,
945
+ "grad_norm": 1.1515477895736694,
946
+ "learning_rate": 1.673779765820287e-05,
947
+ "loss": 0.196,
948
+ "step": 6700
949
+ },
950
+ {
951
+ "epoch": 0.876509544215037,
952
+ "grad_norm": 0.5803254842758179,
953
+ "learning_rate": 1.6711485330877517e-05,
954
+ "loss": 0.1923,
955
+ "step": 6750
956
+ },
957
+ {
958
+ "epoch": 0.8830022075055187,
959
+ "grad_norm": 0.8052871227264404,
960
+ "learning_rate": 1.6685173003552165e-05,
961
+ "loss": 0.1894,
962
+ "step": 6800
963
+ },
964
+ {
965
+ "epoch": 0.8894948707960005,
966
+ "grad_norm": 0.9313941597938538,
967
+ "learning_rate": 1.6658860676226813e-05,
968
+ "loss": 0.1889,
969
+ "step": 6850
970
+ },
971
+ {
972
+ "epoch": 0.8959875340864822,
973
+ "grad_norm": 0.5186671614646912,
974
+ "learning_rate": 1.663254834890146e-05,
975
+ "loss": 0.1895,
976
+ "step": 6900
977
+ },
978
+ {
979
+ "epoch": 0.902480197376964,
980
+ "grad_norm": 0.7533177137374878,
981
+ "learning_rate": 1.660623602157611e-05,
982
+ "loss": 0.1956,
983
+ "step": 6950
984
+ },
985
+ {
986
+ "epoch": 0.9089728606674458,
987
+ "grad_norm": 0.7142027020454407,
988
+ "learning_rate": 1.6579923694250758e-05,
989
+ "loss": 0.1921,
990
+ "step": 7000
991
+ },
992
+ {
993
+ "epoch": 0.9154655239579276,
994
+ "grad_norm": 1.0748203992843628,
995
+ "learning_rate": 1.6553611366925407e-05,
996
+ "loss": 0.1847,
997
+ "step": 7050
998
+ },
999
+ {
1000
+ "epoch": 0.9219581872484093,
1001
+ "grad_norm": 0.5605922341346741,
1002
+ "learning_rate": 1.6527299039600055e-05,
1003
+ "loss": 0.1887,
1004
+ "step": 7100
1005
+ },
1006
+ {
1007
+ "epoch": 0.9284508505388911,
1008
+ "grad_norm": 0.5474116802215576,
1009
+ "learning_rate": 1.6500986712274703e-05,
1010
+ "loss": 0.1948,
1011
+ "step": 7150
1012
+ },
1013
+ {
1014
+ "epoch": 0.9349435138293728,
1015
+ "grad_norm": 0.9507768154144287,
1016
+ "learning_rate": 1.6474674384949348e-05,
1017
+ "loss": 0.2126,
1018
+ "step": 7200
1019
+ },
1020
+ {
1021
+ "epoch": 0.9414361771198546,
1022
+ "grad_norm": 0.694097101688385,
1023
+ "learning_rate": 1.6448362057624e-05,
1024
+ "loss": 0.1904,
1025
+ "step": 7250
1026
+ },
1027
+ {
1028
+ "epoch": 0.9479288404103363,
1029
+ "grad_norm": 0.6153343915939331,
1030
+ "learning_rate": 1.6422049730298645e-05,
1031
+ "loss": 0.1921,
1032
+ "step": 7300
1033
  }
1034
  ],
1035
  "logging_steps": 50,
 
1049
  "attributes": {}
1050
  }
1051
  },
1052
+ "total_flos": 6.133152775887913e+17,
1053
  "train_batch_size": 40,
1054
  "trial_name": null,
1055
  "trial_params": null