hyojin99 commited on
Commit
5f24c7b
Β·
verified Β·
1 Parent(s): e12ba32

Upload tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +6 -0
  2. tokenizer_config.json +12 -0
  3. vocab.json +75 -0
special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "pad_token": "[PAD]",
5
+ "unk_token": "[UNK]"
6
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "do_lower_case": false,
4
+ "eos_token": "</s>",
5
+ "max_length": 512,
6
+ "pad_token": "[PAD]",
7
+ "padding": "max_length",
8
+ "replace_word_delimiter_char": " ",
9
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
10
+ "unk_token": "[UNK]",
11
+ "word_delimiter_token": "|"
12
+ }
vocab.json ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "0": 48,
3
+ "1": 49,
4
+ "2": 55,
5
+ "3": 15,
6
+ "4": 35,
7
+ "8": 66,
8
+ "[PAD]": 72,
9
+ "[UNK]": 71,
10
+ "a": 24,
11
+ "c": 67,
12
+ "d": 26,
13
+ "e": 20,
14
+ "f": 41,
15
+ "i": 19,
16
+ "j": 62,
17
+ "m": 0,
18
+ "n": 6,
19
+ "o": 56,
20
+ "p": 23,
21
+ "r": 5,
22
+ "s": 40,
23
+ "t": 47,
24
+ "u": 39,
25
+ "v": 44,
26
+ "|": 21,
27
+ "γ„±": 11,
28
+ "γ„²": 43,
29
+ "γ„΄": 8,
30
+ "γ„΅": 45,
31
+ "γ„Ά": 38,
32
+ "γ„·": 63,
33
+ "γ„Έ": 65,
34
+ "γ„Ή": 29,
35
+ "γ„Ί": 69,
36
+ "γ„»": 25,
37
+ "γ„Ό": 70,
38
+ "γ„Ύ": 52,
39
+ "γ…€": 59,
40
+ "ㅁ": 16,
41
+ "γ…‚": 4,
42
+ "γ…ƒ": 61,
43
+ "γ…„": 36,
44
+ "γ……": 10,
45
+ "γ…†": 7,
46
+ "γ…‡": 64,
47
+ "γ…ˆ": 68,
48
+ "γ…‰": 2,
49
+ "γ…Š": 34,
50
+ "γ…‹": 3,
51
+ "γ…Œ": 37,
52
+ "ㅍ": 30,
53
+ "γ…Ž": 12,
54
+ "ㅏ": 13,
55
+ "ㅐ": 42,
56
+ "γ…‘": 22,
57
+ "γ…’": 54,
58
+ "γ…“": 17,
59
+ "γ…”": 57,
60
+ "γ…•": 1,
61
+ "γ…–": 50,
62
+ "γ…—": 51,
63
+ "γ…˜": 14,
64
+ "γ…™": 46,
65
+ "γ…š": 27,
66
+ "γ…›": 32,
67
+ "γ…œ": 31,
68
+ "ㅝ": 9,
69
+ "γ…ž": 18,
70
+ "γ…Ÿ": 28,
71
+ "γ… ": 58,
72
+ "γ…‘": 33,
73
+ "γ…’": 53,
74
+ "γ…£": 60
75
+ }