aqib-prog commited on
Commit
594f115
·
verified ·
1 Parent(s): aad6508

Upload tokenizer.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. tokenizer.json +176 -0
tokenizer.json ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [],
6
+ "normalizer": null,
7
+ "pre_tokenizer": {
8
+ "type": "ByteLevel",
9
+ "add_prefix_space": false,
10
+ "trim_offsets": true,
11
+ "use_regex": true
12
+ },
13
+ "post_processor": null,
14
+ "decoder": {
15
+ "type": "ByteLevel",
16
+ "add_prefix_space": true,
17
+ "trim_offsets": true,
18
+ "use_regex": true
19
+ },
20
+ "model": {
21
+ "type": "WordLevel",
22
+ "vocab": {
23
+ "<unk>": 0,
24
+ "<|endoftext|>": 1,
25
+ "<|im_start|>": 2,
26
+ "<|im_end|>": 3,
27
+ "<|object_ref_start|>": 4,
28
+ "<|object_ref_end|>": 5,
29
+ "<|box_start|>": 6,
30
+ "<|box_end|>": 7,
31
+ "<|quad_start|>": 8,
32
+ "<|quad_end|>": 9,
33
+ "<|vision_start|>": 10,
34
+ "<|vision_end|>": 11,
35
+ "<|vision_pad|>": 12,
36
+ "<|image_pad|>": 13,
37
+ "<|video_pad|>": 14,
38
+ "<tool_call>": 15,
39
+ "</tool_call>": 16,
40
+ "<|fim_prefix|>": 17,
41
+ "<|fim_middle|>": 18,
42
+ "<|fim_suffix|>": 19,
43
+ "<|fim_pad|>": 20,
44
+ "<|repo_name|>": 21,
45
+ "<|file_sep|>": 22,
46
+ "<image>": 23,
47
+ "</image>": 24,
48
+ "<ref>": 25,
49
+ "</ref>": 26,
50
+ "<box>": 27,
51
+ "</box>": 28,
52
+ "<quad>": 29,
53
+ "</quad>": 30,
54
+ "<point>": 31,
55
+ "</point>": 32,
56
+ "<slice>": 33,
57
+ "</slice>": 34,
58
+ "<image_id>": 35,
59
+ "</image_id>": 36,
60
+ "<unit>": 37,
61
+ "</unit>": 38,
62
+ "<asr>": 39,
63
+ "</asr>": 40,
64
+ "<query>": 41,
65
+ "</query>": 42,
66
+ "<|audio_start|>": 43,
67
+ "<|audio|>": 44,
68
+ "<|audio_end|>": 45,
69
+ "<|spk_bos|>": 46,
70
+ "<|spk|>": 47,
71
+ "<|spk_eos|>": 48,
72
+ "<|tts_bos|>": 49,
73
+ "<|tts_eos|>": 50,
74
+ "<|listen|>": 51,
75
+ "<|speak|>": 52,
76
+ "<|interrupt|>": 53,
77
+ "<|vad_start|>": 54,
78
+ "<|vad_end|>": 55,
79
+ "<reserved_43>": 56,
80
+ "<reserved_53>": 57,
81
+ "!": 58,
82
+ "\"": 59,
83
+ "#": 60,
84
+ "$": 61,
85
+ "%": 62,
86
+ "&": 63,
87
+ "'": 64,
88
+ "(": 65,
89
+ ")": 66,
90
+ "*": 67,
91
+ "+": 68,
92
+ ",": 69,
93
+ "-": 70,
94
+ ".": 71,
95
+ "/": 72,
96
+ "0": 73,
97
+ "1": 74,
98
+ "2": 75,
99
+ "3": 76,
100
+ "4": 77,
101
+ "5": 78,
102
+ "6": 79,
103
+ "7": 80,
104
+ "8": 81,
105
+ "9": 82,
106
+ ":": 83,
107
+ ";": 84,
108
+ "<": 85,
109
+ "=": 86,
110
+ ">": 87,
111
+ "?": 88,
112
+ "@": 89,
113
+ "A": 90,
114
+ "B": 91,
115
+ "C": 92,
116
+ "D": 93,
117
+ "E": 94,
118
+ "F": 95,
119
+ "G": 96,
120
+ "H": 97,
121
+ "I": 98,
122
+ "J": 99,
123
+ "K": 100,
124
+ "L": 101,
125
+ "M": 102,
126
+ "N": 103,
127
+ "O": 104,
128
+ "P": 105,
129
+ "Q": 106,
130
+ "R": 107,
131
+ "S": 108,
132
+ "T": 109,
133
+ "U": 110,
134
+ "V": 111,
135
+ "W": 112,
136
+ "X": 113,
137
+ "Y": 114,
138
+ "Z": 115,
139
+ "[": 116,
140
+ "\\": 117,
141
+ "]": 118,
142
+ "^": 119,
143
+ "_": 120,
144
+ "`": 121,
145
+ "a": 122,
146
+ "b": 123,
147
+ "c": 124,
148
+ "d": 125,
149
+ "e": 126,
150
+ "f": 127,
151
+ "g": 128,
152
+ "h": 129,
153
+ "i": 130,
154
+ "j": 131,
155
+ "k": 132,
156
+ "l": 133,
157
+ "m": 134,
158
+ "n": 135,
159
+ "o": 136,
160
+ "p": 137,
161
+ "q": 138,
162
+ "r": 139,
163
+ "s": 140,
164
+ "t": 141,
165
+ "u": 142,
166
+ "v": 143,
167
+ "w": 144,
168
+ "x": 145,
169
+ "y": 146,
170
+ "z": 147,
171
+ "{": 148,
172
+ "|": 149
173
+ },
174
+ "unk_token": "<unk>"
175
+ }
176
+ }