rbcurzon commited on
Commit
34f235d
·
verified ·
1 Parent(s): 6ca3aee

Upload tokenizer

Browse files
added_tokens.json CHANGED
@@ -1,3 +1,3 @@
1
  {
2
- "<unk>": 31
3
  }
 
1
  {
2
+ "<unk>": 41
3
  }
special_tokens_map.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "pad_token": {
3
- "content": "a",
4
  "lstrip": false,
5
  "normalized": false,
6
  "rstrip": false,
 
1
  {
2
  "pad_token": {
3
+ "content": "0",
4
  "lstrip": false,
5
  "normalized": false,
6
  "rstrip": false,
tokenizer_config.json CHANGED
@@ -2,14 +2,14 @@
2
  "add_blank": true,
3
  "added_tokens_decoder": {
4
  "0": {
5
- "content": "a",
6
  "lstrip": false,
7
  "normalized": false,
8
  "rstrip": false,
9
  "single_word": false,
10
  "special": true
11
  },
12
- "31": {
13
  "content": "<unk>",
14
  "lstrip": false,
15
  "normalized": false,
@@ -21,10 +21,10 @@
21
  "clean_up_tokenization_spaces": true,
22
  "extra_special_tokens": {},
23
  "is_uroman": false,
24
- "language": "bcl",
25
  "model_max_length": 1000000000000000019884624838656,
26
  "normalize": true,
27
- "pad_token": "a",
28
  "phonemize": false,
29
  "tokenizer_class": "VitsTokenizer",
30
  "unk_token": "<unk>",
 
2
  "add_blank": true,
3
  "added_tokens_decoder": {
4
  "0": {
5
+ "content": "0",
6
  "lstrip": false,
7
  "normalized": false,
8
  "rstrip": false,
9
  "single_word": false,
10
  "special": true
11
  },
12
+ "41": {
13
  "content": "<unk>",
14
  "lstrip": false,
15
  "normalized": false,
 
21
  "clean_up_tokenization_spaces": true,
22
  "extra_special_tokens": {},
23
  "is_uroman": false,
24
+ "language": "ceb",
25
  "model_max_length": 1000000000000000019884624838656,
26
  "normalize": true,
27
+ "pad_token": "0",
28
  "phonemize": false,
29
  "tokenizer_class": "VitsTokenizer",
30
  "unk_token": "<unk>",
vocab.json CHANGED
@@ -1,33 +1,43 @@
1
  {
2
- " ": 30,
3
- "'": 27,
4
- "-": 21,
5
- "a": 0,
6
- "b": 14,
7
- "c": 22,
8
- "d": 9,
9
- "e": 18,
10
- "f": 23,
11
- "g": 4,
12
- "h": 17,
13
- "i": 3,
14
- "j": 20,
15
- "k": 7,
16
- "l": 12,
17
- "m": 8,
18
- "n": 2,
19
- "o": 5,
20
- "p": 13,
21
- "q": 28,
22
- "r": 15,
23
- "s": 6,
24
- "t": 10,
25
- "u": 11,
26
- "v": 26,
27
- "w": 19,
28
- "x": 29,
29
- "y": 16,
30
- "z": 25,
31
- "|": 1,
32
- "": 24
 
 
 
 
 
 
 
 
 
 
33
  }
 
1
  {
2
+ " ": 27,
3
+ "'": 10,
4
+ "-": 9,
5
+ "0": 0,
6
+ "1": 23,
7
+ "2": 15,
8
+ "3": 35,
9
+ "4": 32,
10
+ "5": 18,
11
+ "6": 4,
12
+ "7": 37,
13
+ "8": 22,
14
+ "9": 11,
15
+ "_": 28,
16
+ "a": 24,
17
+ "b": 38,
18
+ "c": 8,
19
+ "d": 3,
20
+ "e": 16,
21
+ "f": 39,
22
+ "g": 6,
23
+ "h": 20,
24
+ "i": 33,
25
+ "j": 1,
26
+ "k": 34,
27
+ "l": 29,
28
+ "m": 31,
29
+ "n": 30,
30
+ "o": 36,
31
+ "p": 25,
32
+ "q": 17,
33
+ "r": 14,
34
+ "s": 5,
35
+ "t": 19,
36
+ "u": 12,
37
+ "v": 13,
38
+ "w": 7,
39
+ "x": 26,
40
+ "y": 40,
41
+ "z": 2,
42
+ "—": 21
43
  }