jree423 commited on
Commit
dd065be
·
verified ·
1 Parent(s): d05c4c9

Upload tokenizer.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. tokenizer.json +32 -11
tokenizer.json CHANGED
@@ -3,19 +3,40 @@
3
  "truncation": null,
4
  "padding": null,
5
  "added_tokens": [],
6
- "normalizer": null,
7
- "pre_tokenizer": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  "post_processor": null,
9
  "decoder": null,
10
  "model": {
11
- "type": "unigram",
12
- "vocab": [
13
- {
14
- "id": 0,
15
- "piece": "<unk>",
16
- "score": 0.0
17
- }
18
- ],
19
- "unk_id": 0
 
 
 
 
 
 
20
  }
21
  }
 
3
  "truncation": null,
4
  "padding": null,
5
  "added_tokens": [],
6
+ "normalizer": {
7
+ "type": "Sequence",
8
+ "normalizers": [
9
+ {
10
+ "type": "NFD"
11
+ },
12
+ {
13
+ "type": "Lowercase"
14
+ },
15
+ {
16
+ "type": "StripAccents"
17
+ }
18
+ ]
19
+ },
20
+ "pre_tokenizer": {
21
+ "type": "Whitespace"
22
+ },
23
  "post_processor": null,
24
  "decoder": null,
25
  "model": {
26
+ "type": "WordPiece",
27
+ "vocab": {
28
+ "[PAD]": 0,
29
+ "[UNK]": 1,
30
+ "a": 2,
31
+ "the": 3,
32
+ "cat": 4,
33
+ "dog": 5,
34
+ "house": 6,
35
+ "mountain": 7,
36
+ "landscape": 8,
37
+ "beautiful": 9,
38
+ "sunset": 10
39
+ },
40
+ "unk_token": "[UNK]"
41
  }
42
  }