hts98 commited on
Commit
e59e22a
·
1 Parent(s): b85ec9a

Upload tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +114 -114
vocab.json CHANGED
@@ -1,119 +1,119 @@
1
  {
2
- "%": 85,
3
- "&": 35,
4
- "'": 92,
5
- "*": 24,
6
- "+": 33,
7
- "/": 69,
8
- "0": 96,
9
- "1": 73,
10
- "2": 67,
11
- "3": 52,
12
- "4": 18,
13
- "5": 39,
14
- "6": 17,
15
- "7": 47,
16
- "8": 27,
17
- "9": 5,
18
- "<": 74,
19
- "=": 23,
20
- ">": 98,
21
- "[": 91,
22
  "[PAD]": 116,
23
  "[UNK]": 115,
24
- "]": 112,
25
- "a": 11,
26
- "b": 42,
27
- "c": 104,
28
- "d": 53,
29
- "e": 115,
30
- "f": 15,
31
- "g": 107,
32
- "h": 51,
33
- "i": 7,
34
- "j": 45,
35
- "k": 32,
36
- "l": 109,
37
- "m": 61,
38
- "n": 31,
39
- "o": 13,
40
- "p": 3,
41
- "q": 88,
42
- "r": 110,
43
- "s": 30,
44
- "t": 54,
45
- "u": 79,
46
- "v": 46,
47
- "w": 44,
48
- "x": 81,
49
- "y": 99,
50
- "z": 83,
51
- "|": 102,
52
- "à": 93,
53
- "á": 103,
54
- "â": 62,
55
- "ã": 64,
56
- "è": 105,
57
- "é": 19,
58
- "ê": 60,
59
- "ì": 50,
60
- "í": 38,
61
- "ò": 12,
62
- "ó": 56,
63
- "ô": 66,
64
- "õ": 9,
65
- "ù": 59,
66
- "ú": 55,
67
- "ý": 22,
68
- "ă": 86,
69
- "đ": 49,
70
- "ĩ": 14,
71
- "ũ": 48,
72
- "ơ": 2,
73
- "ư": 71,
74
- "ạ": 82,
75
- "ả": 28,
76
- "ấ": 57,
77
- "ầ": 4,
78
- "ẩ": 26,
79
- "ẫ": 113,
80
- "ậ": 100,
81
- "ắ": 80,
82
- "ằ": 8,
83
- "ẳ": 68,
84
- "ẵ": 0,
85
- "ặ": 29,
86
- "ẹ": 41,
87
- "ẻ": 77,
88
- "ẽ": 20,
89
- "ế": 70,
90
- "ề": 90,
91
- "ể": 114,
92
- "ễ": 84,
93
- "ệ": 16,
94
- "ỉ": 58,
95
- "ị": 1,
96
- "ọ": 89,
97
- "ỏ": 101,
98
- "ố": 87,
99
- "ồ": 78,
100
- "ổ": 95,
101
- "ỗ": 106,
102
- "ộ": 75,
103
- "ớ": 34,
104
- "ờ": 63,
105
- "ở": 21,
106
- "ỡ": 76,
107
  "ợ": 25,
108
- "ụ": 65,
109
- "ủ": 97,
110
- "ứ": 37,
111
- "ừ": 94,
112
- "ử": 72,
113
- "ữ": 111,
114
- "ự": 10,
115
- "ỳ": 43,
116
- "ỷ": 6,
117
- "ỹ": 36,
118
- "₫": 40
119
  }
 
1
  {
2
+ "%": 36,
3
+ "&": 96,
4
+ "'": 3,
5
+ "*": 108,
6
+ "+": 13,
7
+ "/": 99,
8
+ "0": 2,
9
+ "1": 5,
10
+ "2": 113,
11
+ "3": 73,
12
+ "4": 28,
13
+ "5": 60,
14
+ "6": 110,
15
+ "7": 46,
16
+ "8": 39,
17
+ "9": 76,
18
+ "<": 90,
19
+ "=": 59,
20
+ ">": 91,
21
+ "[": 17,
22
  "[PAD]": 116,
23
  "[UNK]": 115,
24
+ "]": 47,
25
+ "a": 70,
26
+ "b": 49,
27
+ "c": 83,
28
+ "d": 31,
29
+ "e": 0,
30
+ "f": 32,
31
+ "g": 38,
32
+ "h": 98,
33
+ "i": 1,
34
+ "j": 94,
35
+ "k": 82,
36
+ "l": 53,
37
+ "m": 100,
38
+ "n": 101,
39
+ "o": 111,
40
+ "p": 72,
41
+ "q": 52,
42
+ "r": 27,
43
+ "s": 62,
44
+ "t": 24,
45
+ "u": 80,
46
+ "v": 93,
47
+ "w": 58,
48
+ "x": 87,
49
+ "y": 11,
50
+ "z": 67,
51
+ "|": 106,
52
+ "à": 68,
53
+ "á": 22,
54
+ "â": 54,
55
+ "ã": 65,
56
+ "è": 89,
57
+ "é": 79,
58
+ "ê": 16,
59
+ "ì": 81,
60
+ "í": 35,
61
+ "ò": 40,
62
+ "ó": 63,
63
+ "ô": 4,
64
+ "õ": 14,
65
+ "ù": 74,
66
+ "ú": 34,
67
+ "ý": 97,
68
+ "ă": 71,
69
+ "đ": 12,
70
+ "ĩ": 20,
71
+ "ũ": 77,
72
+ "ơ": 30,
73
+ "ư": 57,
74
+ "ạ": 33,
75
+ "ả": 104,
76
+ "ấ": 41,
77
+ "ầ": 107,
78
+ "ẩ": 10,
79
+ "ẫ": 56,
80
+ "ậ": 103,
81
+ "ắ": 78,
82
+ "ằ": 105,
83
+ "ẳ": 112,
84
+ "ẵ": 48,
85
+ "ặ": 42,
86
+ "ẹ": 43,
87
+ "ẻ": 21,
88
+ "ẽ": 92,
89
+ "ế": 64,
90
+ "ề": 95,
91
+ "ể": 50,
92
+ "ễ": 69,
93
+ "ệ": 51,
94
+ "ỉ": 109,
95
+ "ị": 8,
96
+ "ọ": 19,
97
+ "ỏ": 6,
98
+ "ố": 86,
99
+ "ồ": 84,
100
+ "ổ": 114,
101
+ "ỗ": 75,
102
+ "ộ": 18,
103
+ "ớ": 85,
104
+ "ờ": 26,
105
+ "ở": 45,
106
+ "ỡ": 61,
107
  "ợ": 25,
108
+ "ụ": 37,
109
+ "ủ": 15,
110
+ "ứ": 66,
111
+ "ừ": 23,
112
+ "ử": 55,
113
+ "ữ": 44,
114
+ "ự": 7,
115
+ "ỳ": 9,
116
+ "ỷ": 88,
117
+ "ỹ": 29,
118
+ "₫": 102
119
  }