Safetensors
wav2vec2-bert
indiejoseph commited on
Commit
4e20d93
·
verified ·
1 Parent(s): 54a838c

Upload SpeechToJyutpingPipeline

Browse files
Files changed (3) hide show
  1. config.json +9 -9
  2. special_tokens_map.json +28 -4
  3. tokenizer_config.json +2 -1
config.json CHANGED
@@ -1,13 +1,5 @@
1
  {
2
  "_name_or_path": "hon9kon9ize/wav2vec2bert-jyutping",
3
- "custom_pipelines": {
4
- "speech-to-jyutping": {
5
- "impl": "pipeline.SpeechToJyutpingPipeline",
6
- "pt": [
7
- "model.Wav2Vec2BertForCantonese"
8
- ]
9
- }
10
- },
11
  "activation_dropout": 0.0,
12
  "adapter_act": "relu",
13
  "adapter_kernel_size": 3,
@@ -26,6 +18,14 @@
26
  "conv_depthwise_kernel_size": 31,
27
  "ctc_loss_reduction": "mean",
28
  "ctc_zero_infinity": false,
 
 
 
 
 
 
 
 
29
  "diversity_loss_weight": 0.1,
30
  "eos_token_id": 2,
31
  "feat_proj_dropout": 0.0,
@@ -83,7 +83,7 @@
83
  ],
84
  "tone_vocab_size": 11,
85
  "torch_dtype": "float32",
86
- "transformers_version": "4.46.1",
87
  "use_intermediate_ffn_before_adapter": false,
88
  "use_weighted_layer_sum": false,
89
  "vocab_size": 77,
 
1
  {
2
  "_name_or_path": "hon9kon9ize/wav2vec2bert-jyutping",
 
 
 
 
 
 
 
 
3
  "activation_dropout": 0.0,
4
  "adapter_act": "relu",
5
  "adapter_kernel_size": 3,
 
18
  "conv_depthwise_kernel_size": 31,
19
  "ctc_loss_reduction": "mean",
20
  "ctc_zero_infinity": false,
21
+ "custom_pipelines": {
22
+ "speech-to-jyutping": {
23
+ "impl": "hon9kon9ize/wav2vec2bert-jyutping--pipeline.SpeechToJyutpingPipeline",
24
+ "pt": [
25
+ "model.Wav2Vec2BertForCantonese"
26
+ ]
27
+ }
28
+ },
29
  "diversity_loss_weight": 0.1,
30
  "eos_token_id": 2,
31
  "feat_proj_dropout": 0.0,
 
83
  ],
84
  "tone_vocab_size": 11,
85
  "torch_dtype": "float32",
86
+ "transformers_version": "4.49.0",
87
  "use_intermediate_ffn_before_adapter": false,
88
  "use_weighted_layer_sum": false,
89
  "vocab_size": 77,
special_tokens_map.json CHANGED
@@ -1,6 +1,30 @@
1
  {
2
- "bos_token": "<s>",
3
- "eos_token": "</s>",
4
- "pad_token": "[PAD]",
5
- "unk_token": "[UNK]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  }
 
1
  {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": true,
19
+ "normalized": false,
20
+ "rstrip": true,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "[UNK]",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": true,
28
+ "single_word": false
29
+ }
30
  }
tokenizer_config.json CHANGED
@@ -621,6 +621,7 @@
621
  "clean_up_tokenization_spaces": false,
622
  "do_lower_case": false,
623
  "eos_token": "</s>",
 
624
  "model_max_length": 1000000000000000019884624838656,
625
  "pad_token": "[PAD]",
626
  "processor_class": "Wav2Vec2BertProcessor",
@@ -629,4 +630,4 @@
629
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
630
  "unk_token": "[UNK]",
631
  "word_delimiter_token": "|"
632
- }
 
621
  "clean_up_tokenization_spaces": false,
622
  "do_lower_case": false,
623
  "eos_token": "</s>",
624
+ "extra_special_tokens": {},
625
  "model_max_length": 1000000000000000019884624838656,
626
  "pad_token": "[PAD]",
627
  "processor_class": "Wav2Vec2BertProcessor",
 
630
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
631
  "unk_token": "[UNK]",
632
  "word_delimiter_token": "|"
633
+ }