trungdang2901 commited on
Commit
f1342a6
·
verified ·
1 Parent(s): 84b19be

Upload tokenizer

Browse files
Files changed (3) hide show
  1. added_tokens.json +4 -1
  2. tokenizer.json +29 -2
  3. tokenizer_config.json +26 -2
added_tokens.json CHANGED
@@ -1,4 +1,7 @@
1
  {
2
  "<mask>": 20001,
3
- "<unk>": 20000
 
 
 
4
  }
 
1
  {
2
  "<mask>": 20001,
3
+ "<unk>": 20000,
4
+ "<vi>": 20002,
5
+ "cv": 20004,
6
+ "zh": 20003
7
  }
tokenizer.json CHANGED
@@ -43,10 +43,37 @@
43
  "id": 20001,
44
  "content": "<mask>",
45
  "single_word": false,
46
- "lstrip": true,
47
  "rstrip": false,
48
  "normalized": true,
49
- "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  }
51
  ],
52
  "normalizer": null,
 
43
  "id": 20001,
44
  "content": "<mask>",
45
  "single_word": false,
46
+ "lstrip": false,
47
  "rstrip": false,
48
  "normalized": true,
49
+ "special": false
50
+ },
51
+ {
52
+ "id": 20002,
53
+ "content": "<vi>",
54
+ "single_word": false,
55
+ "lstrip": false,
56
+ "rstrip": false,
57
+ "normalized": true,
58
+ "special": false
59
+ },
60
+ {
61
+ "id": 20003,
62
+ "content": "zh",
63
+ "single_word": false,
64
+ "lstrip": false,
65
+ "rstrip": false,
66
+ "normalized": true,
67
+ "special": false
68
+ },
69
+ {
70
+ "id": 20004,
71
+ "content": "cv",
72
+ "single_word": false,
73
+ "lstrip": false,
74
+ "rstrip": false,
75
+ "normalized": true,
76
+ "special": false
77
  }
78
  ],
79
  "normalizer": null,
tokenizer_config.json CHANGED
@@ -35,11 +35,35 @@
35
  },
36
  "20001": {
37
  "content": "<mask>",
38
- "lstrip": true,
39
  "normalized": true,
40
  "rstrip": false,
41
  "single_word": false,
42
- "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  }
44
  },
45
  "bos_token": "<s>",
 
35
  },
36
  "20001": {
37
  "content": "<mask>",
38
+ "lstrip": false,
39
  "normalized": true,
40
  "rstrip": false,
41
  "single_word": false,
42
+ "special": false
43
+ },
44
+ "20002": {
45
+ "content": "<vi>",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": false
51
+ },
52
+ "20003": {
53
+ "content": "zh",
54
+ "lstrip": false,
55
+ "normalized": true,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": false
59
+ },
60
+ "20004": {
61
+ "content": "cv",
62
+ "lstrip": false,
63
+ "normalized": true,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": false
67
  }
68
  },
69
  "bos_token": "<s>",