Frinkles commited on
Commit
0fb34c1
·
verified ·
1 Parent(s): 89c89ad

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +0 -0
  2. tokenizer_config.json +11 -19
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -20,14 +20,6 @@
20
  "special": true
21
  },
22
  "2": {
23
- "content": "</s>",
24
- "lstrip": false,
25
- "normalized": false,
26
- "rstrip": true,
27
- "single_word": false,
28
- "special": false
29
- },
30
- "32000": {
31
  "content": "<|endoftext|>",
32
  "lstrip": false,
33
  "normalized": false,
@@ -35,7 +27,7 @@
35
  "single_word": false,
36
  "special": true
37
  },
38
- "32001": {
39
  "content": "<|assistant|>",
40
  "lstrip": false,
41
  "normalized": false,
@@ -43,7 +35,7 @@
43
  "single_word": false,
44
  "special": true
45
  },
46
- "32002": {
47
  "content": "<|placeholder1|>",
48
  "lstrip": false,
49
  "normalized": false,
@@ -51,7 +43,7 @@
51
  "single_word": false,
52
  "special": true
53
  },
54
- "32003": {
55
  "content": "<|placeholder2|>",
56
  "lstrip": false,
57
  "normalized": false,
@@ -59,7 +51,7 @@
59
  "single_word": false,
60
  "special": true
61
  },
62
- "32004": {
63
  "content": "<|placeholder3|>",
64
  "lstrip": false,
65
  "normalized": false,
@@ -67,7 +59,7 @@
67
  "single_word": false,
68
  "special": true
69
  },
70
- "32005": {
71
  "content": "<|placeholder4|>",
72
  "lstrip": false,
73
  "normalized": false,
@@ -75,7 +67,7 @@
75
  "single_word": false,
76
  "special": true
77
  },
78
- "32006": {
79
  "content": "<|system|>",
80
  "lstrip": false,
81
  "normalized": false,
@@ -83,7 +75,7 @@
83
  "single_word": false,
84
  "special": true
85
  },
86
- "32007": {
87
  "content": "<|end|>",
88
  "lstrip": false,
89
  "normalized": false,
@@ -91,7 +83,7 @@
91
  "single_word": false,
92
  "special": true
93
  },
94
- "32008": {
95
  "content": "<|placeholder5|>",
96
  "lstrip": false,
97
  "normalized": false,
@@ -99,7 +91,7 @@
99
  "single_word": false,
100
  "special": true
101
  },
102
- "32009": {
103
  "content": "<|placeholder6|>",
104
  "lstrip": false,
105
  "normalized": false,
@@ -107,7 +99,7 @@
107
  "single_word": false,
108
  "special": true
109
  },
110
- "32010": {
111
  "content": "<|user|>",
112
  "lstrip": false,
113
  "normalized": false,
@@ -121,7 +113,7 @@
121
  "clean_up_tokenization_spaces": false,
122
  "eos_token": "<|endoftext|>",
123
  "legacy": false,
124
- "model_max_length": 4096,
125
  "pad_token": "<|endoftext|>",
126
  "padding_side": "left",
127
  "sp_model_kwargs": {},
 
20
  "special": true
21
  },
22
  "2": {
 
 
 
 
 
 
 
 
23
  "content": "<|endoftext|>",
24
  "lstrip": false,
25
  "normalized": false,
 
27
  "single_word": false,
28
  "special": true
29
  },
30
+ "3": {
31
  "content": "<|assistant|>",
32
  "lstrip": false,
33
  "normalized": false,
 
35
  "single_word": false,
36
  "special": true
37
  },
38
+ "4": {
39
  "content": "<|placeholder1|>",
40
  "lstrip": false,
41
  "normalized": false,
 
43
  "single_word": false,
44
  "special": true
45
  },
46
+ "5": {
47
  "content": "<|placeholder2|>",
48
  "lstrip": false,
49
  "normalized": false,
 
51
  "single_word": false,
52
  "special": true
53
  },
54
+ "6": {
55
  "content": "<|placeholder3|>",
56
  "lstrip": false,
57
  "normalized": false,
 
59
  "single_word": false,
60
  "special": true
61
  },
62
+ "7": {
63
  "content": "<|placeholder4|>",
64
  "lstrip": false,
65
  "normalized": false,
 
67
  "single_word": false,
68
  "special": true
69
  },
70
+ "8": {
71
  "content": "<|system|>",
72
  "lstrip": false,
73
  "normalized": false,
 
75
  "single_word": false,
76
  "special": true
77
  },
78
+ "9": {
79
  "content": "<|end|>",
80
  "lstrip": false,
81
  "normalized": false,
 
83
  "single_word": false,
84
  "special": true
85
  },
86
+ "10": {
87
  "content": "<|placeholder5|>",
88
  "lstrip": false,
89
  "normalized": false,
 
91
  "single_word": false,
92
  "special": true
93
  },
94
+ "11": {
95
  "content": "<|placeholder6|>",
96
  "lstrip": false,
97
  "normalized": false,
 
99
  "single_word": false,
100
  "special": true
101
  },
102
+ "12": {
103
  "content": "<|user|>",
104
  "lstrip": false,
105
  "normalized": false,
 
113
  "clean_up_tokenization_spaces": false,
114
  "eos_token": "<|endoftext|>",
115
  "legacy": false,
116
+ "model_max_length": 131072,
117
  "pad_token": "<|endoftext|>",
118
  "padding_side": "left",
119
  "sp_model_kwargs": {},