AnthonyDi commited on
Commit
6bf8f29
·
verified ·
1 Parent(s): 2b50fbe

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +6 -0
  2. tokenizer_config.json +11 -0
  3. vocab.json +147 -0
special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "pad_token": "<pad>",
5
+ "unk_token": "<unk>"
6
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "tokenizer_class": "CharacterTokenizer",
3
+ "auto_map": {
4
+ "AutoTokenizer": "tokenizer.CharacterTokenizer"
5
+ },
6
+ "bos_token": "<s>",
7
+ "eos_token": "</s>",
8
+ "unk_token": "<unk>",
9
+ "pad_token": "<pad>",
10
+ "vocab_file": "vocab.json"
11
+ }
vocab.json ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<pad>": 0,
3
+ "<unk>": 1,
4
+ "<s>": 2,
5
+ "</s>": 3,
6
+ " ": 4,
7
+ "!": 5,
8
+ "\"": 6,
9
+ "#": 7,
10
+ "$": 8,
11
+ "%": 9,
12
+ "&": 10,
13
+ "'": 11,
14
+ "(": 12,
15
+ ")": 13,
16
+ "*": 14,
17
+ "+": 15,
18
+ ",": 16,
19
+ "-": 17,
20
+ ".": 18,
21
+ "/": 19,
22
+ "0": 20,
23
+ "1": 21,
24
+ "2": 22,
25
+ "3": 23,
26
+ "4": 24,
27
+ "5": 25,
28
+ "6": 26,
29
+ "7": 27,
30
+ "8": 28,
31
+ "9": 29,
32
+ ":": 30,
33
+ ";": 31,
34
+ "<": 32,
35
+ "=": 33,
36
+ ">": 34,
37
+ "?": 35,
38
+ "@": 36,
39
+ "A": 37,
40
+ "B": 38,
41
+ "C": 39,
42
+ "D": 40,
43
+ "E": 41,
44
+ "F": 42,
45
+ "G": 43,
46
+ "H": 44,
47
+ "I": 45,
48
+ "J": 46,
49
+ "K": 47,
50
+ "L": 48,
51
+ "M": 49,
52
+ "N": 50,
53
+ "O": 51,
54
+ "P": 52,
55
+ "Q": 53,
56
+ "R": 54,
57
+ "S": 55,
58
+ "T": 56,
59
+ "U": 57,
60
+ "V": 58,
61
+ "W": 59,
62
+ "X": 60,
63
+ "Y": 61,
64
+ "Z": 62,
65
+ "[": 63,
66
+ "\\": 64,
67
+ "]": 65,
68
+ "^": 66,
69
+ "_": 67,
70
+ "`": 68,
71
+ "a": 69,
72
+ "b": 70,
73
+ "c": 71,
74
+ "d": 72,
75
+ "e": 73,
76
+ "f": 74,
77
+ "g": 75,
78
+ "h": 76,
79
+ "i": 77,
80
+ "j": 78,
81
+ "k": 79,
82
+ "l": 80,
83
+ "m": 81,
84
+ "n": 82,
85
+ "o": 83,
86
+ "p": 84,
87
+ "q": 85,
88
+ "r": 86,
89
+ "s": 87,
90
+ "t": 88,
91
+ "u": 89,
92
+ "v": 90,
93
+ "w": 91,
94
+ "x": 92,
95
+ "y": 93,
96
+ "z": 94,
97
+ "{": 95,
98
+ "|": 96,
99
+ "}": 97,
100
+ "~": 98,
101
+ "£": 99,
102
+ "©": 100,
103
+ "®": 101,
104
+ "°": 102,
105
+ "à": 103,
106
+ "á": 104,
107
+ "ä": 105,
108
+ "å": 106,
109
+ "ç": 107,
110
+ "è": 108,
111
+ "é": 109,
112
+ "ê": 110,
113
+ "ë": 111,
114
+ "î": 112,
115
+ "ï": 113,
116
+ "ñ": 114,
117
+ "ó": 115,
118
+ "ô": 116,
119
+ "ö": 117,
120
+ "ø": 118,
121
+ "û": 119,
122
+ "ü": 120,
123
+ "ƍ": 121,
124
+ "̄": 122,
125
+ "Δ": 123,
126
+ "α": 124,
127
+ "β": 125,
128
+ "γ": 126,
129
+ "η": 127,
130
+ "θ": 128,
131
+ "κ": 129,
132
+ "λ": 130,
133
+ "μ": 131,
134
+ "ν": 132,
135
+ "ξ": 133,
136
+ "π": 134,
137
+ "σ": 135,
138
+ "τ": 136,
139
+ "φ": 137,
140
+ "ψ": 138,
141
+ "ω": 139,
142
+ "•": 140,
143
+ "′": 141,
144
+ "€": 142,
145
+ "☐": 143,
146
+ "☒": 144
147
+ }