hts98 commited on
Commit
b85ec9a
·
1 Parent(s): c2ba7da

Upload tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +112 -112
vocab.json CHANGED
@@ -1,119 +1,119 @@
1
  {
2
- "%": 110,
3
- "&": 62,
4
- "'": 49,
5
- "*": 27,
6
- "+": 19,
7
- "/": 81,
8
- "0": 45,
9
- "1": 14,
10
- "2": 11,
11
- "3": 108,
12
- "4": 0,
13
- "5": 72,
14
- "6": 87,
15
- "7": 105,
16
- "8": 56,
17
- "9": 96,
18
  "<": 74,
19
  "=": 23,
20
  ">": 98,
21
- "[": 70,
22
  "[PAD]": 116,
23
  "[UNK]": 115,
24
- "]": 20,
25
- "a": 51,
26
- "b": 88,
27
- "c": 16,
28
- "d": 97,
29
- "e": 78,
30
- "f": 13,
31
- "g": 82,
32
- "h": 101,
33
- "i": 68,
34
- "j": 36,
35
- "k": 65,
36
- "l": 107,
37
- "m": 31,
38
- "n": 115,
39
- "o": 3,
40
- "p": 86,
41
- "q": 37,
42
- "r": 73,
43
- "s": 113,
44
- "t": 28,
45
- "u": 6,
46
- "v": 21,
47
- "w": 61,
48
- "x": 17,
49
- "y": 89,
50
- "z": 48,
51
- "|": 35,
52
- "à": 8,
53
- "á": 83,
54
- "â": 12,
55
- "ã": 95,
56
- "è": 22,
57
- "é": 94,
58
- "ê": 2,
59
- "ì": 77,
60
- "í": 30,
61
- "ò": 112,
62
- "ó": 10,
63
- "ô": 93,
64
- "õ": 32,
65
- "ù": 41,
66
- "ú": 24,
67
- "ý": 76,
68
- "ă": 38,
69
- "đ": 99,
70
- "ĩ": 103,
71
- "ũ": 34,
72
- "ơ": 47,
73
- "ư": 114,
74
- "ạ": 7,
75
- "ả": 69,
76
- "ấ": 55,
77
- "ầ": 52,
78
- "ẩ": 109,
79
- "ẫ": 80,
80
- "ậ": 84,
81
- "ắ": 57,
82
- "ằ": 111,
83
- "ẳ": 50,
84
- "ẵ": 90,
85
- "ặ": 79,
86
- "ẹ": 58,
87
- "ẻ": 106,
88
- "ẽ": 75,
89
- "ế": 5,
90
- "ề": 64,
91
- "ể": 15,
92
- "ễ": 33,
93
- "ệ": 4,
94
- "ỉ": 60,
95
- "ị": 26,
96
- "ọ": 46,
97
- "ỏ": 43,
98
- "ố": 91,
99
- "ồ": 85,
100
- "ổ": 92,
101
- "ỗ": 63,
102
- "ộ": 42,
103
- "ớ": 102,
104
- "ờ": 71,
105
- "ở": 54,
106
- "ỡ": 18,
107
- "ợ": 1,
108
- "ụ": 25,
109
- "ủ": 39,
110
- "ứ": 53,
111
- "ừ": 104,
112
- "ử": 100,
113
- "ữ": 67,
114
- "ự": 40,
115
- "ỳ": 44,
116
- "ỷ": 29,
117
- "ỹ": 59,
118
- "₫": 9
119
  }
 
1
  {
2
+ "%": 85,
3
+ "&": 35,
4
+ "'": 92,
5
+ "*": 24,
6
+ "+": 33,
7
+ "/": 69,
8
+ "0": 96,
9
+ "1": 73,
10
+ "2": 67,
11
+ "3": 52,
12
+ "4": 18,
13
+ "5": 39,
14
+ "6": 17,
15
+ "7": 47,
16
+ "8": 27,
17
+ "9": 5,
18
  "<": 74,
19
  "=": 23,
20
  ">": 98,
21
+ "[": 91,
22
  "[PAD]": 116,
23
  "[UNK]": 115,
24
+ "]": 112,
25
+ "a": 11,
26
+ "b": 42,
27
+ "c": 104,
28
+ "d": 53,
29
+ "e": 115,
30
+ "f": 15,
31
+ "g": 107,
32
+ "h": 51,
33
+ "i": 7,
34
+ "j": 45,
35
+ "k": 32,
36
+ "l": 109,
37
+ "m": 61,
38
+ "n": 31,
39
+ "o": 13,
40
+ "p": 3,
41
+ "q": 88,
42
+ "r": 110,
43
+ "s": 30,
44
+ "t": 54,
45
+ "u": 79,
46
+ "v": 46,
47
+ "w": 44,
48
+ "x": 81,
49
+ "y": 99,
50
+ "z": 83,
51
+ "|": 102,
52
+ "à": 93,
53
+ "á": 103,
54
+ "â": 62,
55
+ "ã": 64,
56
+ "è": 105,
57
+ "é": 19,
58
+ "ê": 60,
59
+ "ì": 50,
60
+ "í": 38,
61
+ "ò": 12,
62
+ "ó": 56,
63
+ "ô": 66,
64
+ "õ": 9,
65
+ "ù": 59,
66
+ "ú": 55,
67
+ "ý": 22,
68
+ "ă": 86,
69
+ "đ": 49,
70
+ "ĩ": 14,
71
+ "ũ": 48,
72
+ "ơ": 2,
73
+ "ư": 71,
74
+ "ạ": 82,
75
+ "ả": 28,
76
+ "ấ": 57,
77
+ "ầ": 4,
78
+ "ẩ": 26,
79
+ "ẫ": 113,
80
+ "ậ": 100,
81
+ "ắ": 80,
82
+ "ằ": 8,
83
+ "ẳ": 68,
84
+ "ẵ": 0,
85
+ "ặ": 29,
86
+ "ẹ": 41,
87
+ "ẻ": 77,
88
+ "ẽ": 20,
89
+ "ế": 70,
90
+ "ề": 90,
91
+ "ể": 114,
92
+ "ễ": 84,
93
+ "ệ": 16,
94
+ "ỉ": 58,
95
+ "ị": 1,
96
+ "ọ": 89,
97
+ "ỏ": 101,
98
+ "ố": 87,
99
+ "ồ": 78,
100
+ "ổ": 95,
101
+ "ỗ": 106,
102
+ "ộ": 75,
103
+ "ớ": 34,
104
+ "ờ": 63,
105
+ "ở": 21,
106
+ "ỡ": 76,
107
+ "ợ": 25,
108
+ "ụ": 65,
109
+ "ủ": 97,
110
+ "ứ": 37,
111
+ "ừ": 94,
112
+ "ử": 72,
113
+ "ữ": 111,
114
+ "ự": 10,
115
+ "ỳ": 43,
116
+ "ỷ": 6,
117
+ "ỹ": 36,
118
+ "₫": 40
119
  }