linekeita commited on
Commit
005fc14
·
verified ·
1 Parent(s): 37343ea

Upload tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +81 -81
vocab.json CHANGED
@@ -1,86 +1,86 @@
1
  {
2
- "&": 46,
3
- "'": 26,
4
- "(": 43,
5
- ")": 79,
6
- "/": 4,
7
- "=": 3,
8
  "[PAD]": 83,
9
  "[UNK]": 82,
10
- "a": 20,
11
- "b": 55,
12
- "c": 67,
13
- "d": 60,
14
- "e": 57,
15
- "f": 27,
16
- "g": 74,
17
- "h": 14,
18
- "i": 69,
19
- "j": 49,
20
- "k": 11,
21
- "l": 47,
22
- "m": 77,
23
- "n": 58,
24
- "o": 2,
25
- "p": 81,
26
- "q": 76,
27
- "r": 36,
28
- "s": 0,
29
- "t": 50,
30
- "u": 40,
31
- "v": 16,
32
- "w": 59,
33
- "x": 63,
34
- "y": 18,
35
- "z": 64,
36
- "|": 39,
37
- "«": 23,
38
- "»": 28,
39
- "à": 33,
40
- "á": 35,
41
- "â": 71,
42
- "å": 73,
43
- "ç": 45,
44
- "è": 10,
45
- "é": 70,
46
- "ê": 34,
47
- "ë": 13,
48
- "í": 56,
49
- "î": 31,
50
- "ï": 68,
51
- "ñ": 48,
52
- "ó": 30,
53
- "ô": 25,
54
- "ö": 37,
55
- "ù": 80,
56
- "ú": 62,
57
- "û": 5,
58
- "ü": 52,
59
- "ā": 44,
60
- "ă": 51,
61
- "č": 24,
62
- "ı": 29,
63
- "ł": 61,
64
- "ň": 53,
65
- "ō": 22,
66
- "œ": 7,
67
- "ş": 8,
68
- "š": 15,
69
- "ū": 12,
70
- "ș": 41,
71
- "ʻ": 42,
72
- "ʼ": 66,
73
- "̀": 19,
74
- "́": 1,
75
- "̂": 38,
76
- "̧": 6,
77
- "α": 21,
78
- "ễ": 72,
79
- "–": 32,
80
  "—": 9,
81
- "’": 54,
82
- "“": 17,
83
- "”": 78,
84
- "…": 75,
85
- "€": 65
86
  }
 
1
  {
2
+ "&": 6,
3
+ "'": 63,
4
+ "(": 40,
5
+ ")": 26,
6
+ "/": 78,
7
+ "=": 32,
8
  "[PAD]": 83,
9
  "[UNK]": 82,
10
+ "a": 36,
11
+ "b": 11,
12
+ "c": 77,
13
+ "d": 55,
14
+ "e": 66,
15
+ "f": 57,
16
+ "g": 50,
17
+ "h": 51,
18
+ "i": 76,
19
+ "j": 18,
20
+ "k": 75,
21
+ "l": 31,
22
+ "m": 15,
23
+ "n": 47,
24
+ "o": 5,
25
+ "p": 12,
26
+ "q": 54,
27
+ "r": 59,
28
+ "s": 35,
29
+ "t": 60,
30
+ "u": 45,
31
+ "v": 52,
32
+ "w": 21,
33
+ "x": 70,
34
+ "y": 74,
35
+ "z": 43,
36
+ "|": 56,
37
+ "«": 7,
38
+ "»": 39,
39
+ "à": 62,
40
+ "á": 10,
41
+ "â": 58,
42
+ "å": 22,
43
+ "ç": 69,
44
+ "è": 28,
45
+ "é": 24,
46
+ "ê": 64,
47
+ "ë": 72,
48
+ "í": 81,
49
+ "î": 46,
50
+ "ï": 16,
51
+ "ñ": 19,
52
+ "ó": 17,
53
+ "ô": 20,
54
+ "ö": 4,
55
+ "ù": 53,
56
+ "ú": 80,
57
+ "û": 37,
58
+ "ü": 23,
59
+ "ā": 27,
60
+ "ă": 71,
61
+ "č": 73,
62
+ "ı": 2,
63
+ "ł": 34,
64
+ "ň": 49,
65
+ "ō": 68,
66
+ "œ": 3,
67
+ "ş": 48,
68
+ "š": 79,
69
+ "ū": 44,
70
+ "ș": 61,
71
+ "ʻ": 30,
72
+ "ʼ": 65,
73
+ "̀": 8,
74
+ "́": 67,
75
+ "̂": 0,
76
+ "̧": 41,
77
+ "α": 42,
78
+ "ễ": 25,
79
+ "–": 13,
80
  "—": 9,
81
+ "’": 38,
82
+ "“": 1,
83
+ "”": 29,
84
+ "…": 33,
85
+ "€": 14
86
  }