jtatman commited on
Commit
2858638
·
1 Parent(s): 9ef8f0a

Upload tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +28 -2
  2. tokenizer.json +27 -0
  3. tokenizer_config.json +28 -2
special_tokens_map.json CHANGED
@@ -1,7 +1,33 @@
1
  {
2
  "additional_special_tokens": [
3
- "<|ASSISTANT|>",
4
- "<|USER|>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  ],
6
  "bos_token": {
7
  "content": "<|bos|>",
 
1
  {
2
  "additional_special_tokens": [
3
+ {
4
+ "content": "<|user|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "<|assistant|>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
+ {
18
+ "content": "<|system|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
  ],
32
  "bos_token": {
33
  "content": "<|bos|>",
tokenizer.json CHANGED
@@ -79,6 +79,33 @@
79
  "rstrip": false,
80
  "normalized": false,
81
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  }
83
  ],
84
  "normalizer": {
 
79
  "rstrip": false,
80
  "normalized": false,
81
  "special": true
82
+ },
83
+ {
84
+ "id": 32005,
85
+ "content": "<|user|>",
86
+ "single_word": false,
87
+ "lstrip": false,
88
+ "rstrip": false,
89
+ "normalized": false,
90
+ "special": true
91
+ },
92
+ {
93
+ "id": 32006,
94
+ "content": "<|assistant|>",
95
+ "single_word": false,
96
+ "lstrip": false,
97
+ "rstrip": false,
98
+ "normalized": false,
99
+ "special": true
100
+ },
101
+ {
102
+ "id": 32007,
103
+ "content": "<|system|>",
104
+ "single_word": false,
105
+ "lstrip": false,
106
+ "rstrip": false,
107
+ "normalized": false,
108
+ "special": true
109
  }
110
  ],
111
  "normalizer": {
tokenizer_config.json CHANGED
@@ -65,11 +65,37 @@
65
  "rstrip": false,
66
  "single_word": false,
67
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  }
69
  },
70
  "additional_special_tokens": [
71
- "<|ASSISTANT|>",
72
- "<|USER|>"
 
 
73
  ],
74
  "bos_token": "<|bos|>",
75
  "clean_up_tokenization_spaces": false,
 
65
  "rstrip": false,
66
  "single_word": false,
67
  "special": true
68
+ },
69
+ "32005": {
70
+ "content": "<|user|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "32006": {
78
+ "content": "<|assistant|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "32007": {
86
+ "content": "<|system|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
  }
93
  },
94
  "additional_special_tokens": [
95
+ "<|user|>",
96
+ "<|assistant|>",
97
+ "<|system|>",
98
+ "<|endoftext|>"
99
  ],
100
  "bos_token": "<|bos|>",
101
  "clean_up_tokenization_spaces": false,