ashwinij2 commited on
Commit
7806b0c
·
verified ·
1 Parent(s): 1fc2b0a

Upload tokenizer.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. tokenizer.json +94 -0
tokenizer.json ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": {
4
+ "max_length": 2048,
5
+ "direction": "Right",
6
+ "stride": 0
7
+ },
8
+ "padding": {
9
+ "strategy": "fixed_size",
10
+ "direction": "Right",
11
+ "pad_to_multiple_of": 8,
12
+ "pad_id": 2,
13
+ "pad_type_id": 0,
14
+ "pad_token": "</s>"
15
+ },
16
+ "added_tokens": [
17
+ {
18
+ "id": 0,
19
+ "special": true,
20
+ "content": "<unk>",
21
+ "single_word": false
22
+ },
23
+ {
24
+ "id": 1,
25
+ "special": true,
26
+ "content": "<s>",
27
+ "single_word": false
28
+ },
29
+ {
30
+ "id": 2,
31
+ "special": true,
32
+ "content": "</s>",
33
+ "single_word": false
34
+ }
35
+ ],
36
+ "normalizer": {
37
+ "type": "Sequence",
38
+ "normalizers": [
39
+ {
40
+ "type": "Precompiled",
41
+ "precompiled_charsmap": "",
42
+ "lstrip": false,
43
+ "rstrip": false
44
+ }
45
+ ]
46
+ },
47
+ "pre_tokenizer": {
48
+ "type": "ByteLevel",
49
+ "add_prefix_space": true,
50
+ "trim_offsets": true
51
+ },
52
+ "post_processor": {
53
+ "type": "TemplateProcessing",
54
+ "single": [
55
+ "$A",
56
+ "</s>"
57
+ ],
58
+ "pair": [
59
+ "$A",
60
+ "</s>",
61
+ "$B",
62
+ "</s>"
63
+ ],
64
+ "special_tokens": {
65
+ "</s>": {
66
+ "id": 2,
67
+ "ids": [
68
+ 2
69
+ ],
70
+ "tokens": [
71
+ "</s>"
72
+ ]
73
+ }
74
+ }
75
+ },
76
+ "decoder": {
77
+ "type": "ByteLevel",
78
+ "add_prefix_space": true,
79
+ "trim_offsets": true
80
+ },
81
+ "model": {
82
+ "type": "BPE",
83
+ "dropout": null,
84
+ "unk_token": "<unk>",
85
+ "continuing_subword_prefix": "",
86
+ "end_of_word_suffix": "",
87
+ "fuse_unk": false,
88
+ "vocab": {
89
+ "<unk>": 0,
90
+ "<s>": 1,
91
+ "</s>": 2
92
+ }
93
+ }
94
+ }