spsither commited on
Commit
df4832b
·
1 Parent(s): fceb283

Upload processor

Browse files
Files changed (4) hide show
  1. added_tokens.json +2 -2
  2. special_tokens_map.json +16 -0
  3. tokenizer_config.json +2 -0
  4. vocab.json +71 -106
added_tokens.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
- "</s>": 109,
3
- "<s>": 108
4
  }
 
1
  {
2
+ "</s>": 74,
3
+ "<s>": 73
4
  }
special_tokens_map.json CHANGED
@@ -1,4 +1,20 @@
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "bos_token": "<s>",
3
  "eos_token": "</s>",
4
  "pad_token": "[PAD]",
 
1
  {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": true,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "</s>",
12
+ "lstrip": false,
13
+ "normalized": true,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ }
17
+ ],
18
  "bos_token": "<s>",
19
  "eos_token": "</s>",
20
  "pad_token": "[PAD]",
tokenizer_config.json CHANGED
@@ -2,9 +2,11 @@
2
  "bos_token": "<s>",
3
  "do_lower_case": false,
4
  "eos_token": "</s>",
 
5
  "pad_token": "[PAD]",
6
  "processor_class": "Wav2Vec2Processor",
7
  "replace_word_delimiter_char": " ",
 
8
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
9
  "unk_token": "[UNK]",
10
  "word_delimiter_token": "|"
 
2
  "bos_token": "<s>",
3
  "do_lower_case": false,
4
  "eos_token": "</s>",
5
+ "name_or_path": "wav2vec2_run1",
6
  "pad_token": "[PAD]",
7
  "processor_class": "Wav2Vec2Processor",
8
  "replace_word_delimiter_char": " ",
9
+ "special_tokens_map_file": "wav2vec2_run1/special_tokens_map.json",
10
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
11
  "unk_token": "[UNK]",
12
  "word_delimiter_token": "|"
vocab.json CHANGED
@@ -1,110 +1,75 @@
1
  {
2
- "'": 76,
3
- "[PAD]": 107,
4
- "[UNK]": 106,
5
- "|": 71,
6
- "": 77,
7
- "": 51,
8
- "": 81,
9
- "": 61,
10
- "": 32,
11
- "": 62,
12
- "": 95,
13
- "": 86,
14
- "": 41,
15
- "": 14,
16
- "": 85,
17
- "": 83,
18
- "": 80,
19
- "": 4,
20
- "": 34,
21
- "": 11,
22
- "": 20,
23
- "": 38,
24
- "": 22,
25
- "": 8,
26
- "": 23,
27
- "": 78,
28
- "": 63,
29
- "": 44,
30
- "": 69,
31
- "": 3,
32
- "": 1,
33
- "": 91,
34
- "": 16,
35
- "": 52,
36
- "": 94,
37
- "": 70,
38
- "": 102,
39
- "": 89,
40
- "": 21,
41
- "": 101,
42
- "": 64,
43
- "": 88,
44
- "": 47,
45
- "": 55,
46
- "": 66,
47
- "": 92,
48
- "": 6,
49
- "": 40,
50
- "": 39,
51
- "": 24,
52
- "": 25,
53
- "": 96,
54
- "": 5,
55
- "": 103,
56
- "": 53,
57
- "ཨ": 7,
58
- "ཀྵ": 12,
59
- "ཪ": 9,
60
- "ཱ": 31,
61
- "ི": 43,
62
- "ཱི": 82,
63
- "ུ": 27,
64
- "ཱུ": 19,
65
- "ེ": 59,
66
- "ཻ": 58,
67
- "ོ": 73,
68
- "ཽ": 50,
69
- "ཾ": 60,
70
- "ཿ": 68,
71
- "ྀ": 93,
72
- "ྂ": 97,
73
- "ྃ": 2,
74
- "྄": 36,
75
- "྅": 105,
76
- "ྐ": 30,
77
- "ྑ": 46,
78
- "ྒ": 26,
79
- "ྔ": 17,
80
  "ྕ": 37,
81
- "ྖ": 104,
82
  "ྗ": 28,
83
- "ྙ": 45,
84
- "": 67,
85
- "": 13,
86
- "": 65,
87
- "": 74,
88
- "": 84,
89
- "": 56,
90
- "": 29,
91
- "": 98,
92
- "": 79,
93
- "": 0,
94
- "": 49,
95
- "": 90,
96
- "": 18,
97
- "": 35,
98
- "": 72,
99
- "ྪ": 87,
100
- "ྫ": 15,
101
- "ྭ": 99,
102
- "ྰ": 48,
103
- "ྱ": 57,
104
- "ྲ": 54,
105
- "ླ": 42,
106
- "ྴ": 75,
107
- "ྵ": 100,
108
- "ྶ": 10,
109
- "ྷ": 33
110
  }
 
1
  {
2
+ "[PAD]": 72,
3
+ "[UNK]": 71,
4
+ "|": 61,
5
+ "": 39,
6
+ "": 10,
7
+ "": 25,
8
+ "": 43,
9
+ "": 22,
10
+ "": 21,
11
+ "": 50,
12
+ "": 56,
13
+ "": 64,
14
+ "": 0,
15
+ "": 7,
16
+ "": 70,
17
+ "": 12,
18
+ "": 36,
19
+ "": 49,
20
+ "": 35,
21
+ "": 63,
22
+ "": 8,
23
+ "": 30,
24
+ "": 48,
25
+ "": 26,
26
+ "": 57,
27
+ "": 31,
28
+ "": 42,
29
+ "": 20,
30
+ "": 69,
31
+ "": 41,
32
+ "": 6,
33
+ "": 53,
34
+ "": 45,
35
+ "": 1,
36
+ "": 33,
37
+ "": 51,
38
+ "": 4,
39
+ "": 46,
40
+ "": 40,
41
+ "": 13,
42
+ "": 58,
43
+ "": 38,
44
+ "": 9,
45
+ "": 11,
46
+ "": 15,
47
+ "": 55,
48
+ "": 59,
49
+ "": 2,
50
+ "": 5,
51
+ "": 65,
52
+ "": 62,
53
+ "": 29,
54
+ "": 17,
55
+ "": 18,
56
+ "": 16,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  "ྕ": 37,
 
58
  "ྗ": 28,
59
+ "ྙ": 19,
60
+ "": 54,
61
+ "": 66,
62
+ "": 67,
63
+ "": 60,
64
+ "": 47,
65
+ "": 14,
66
+ "": 23,
67
+ "": 34,
68
+ "": 44,
69
+ "": 27,
70
+ "": 24,
71
+ "": 68,
72
+ "": 3,
73
+ "": 52,
74
+ "": 32
 
 
 
 
 
 
 
 
 
 
 
75
  }