styalai commited on
Commit
5359f86
·
verified ·
1 Parent(s): 8a4e23b

Upload tokenizer

Browse files
Files changed (5) hide show
  1. added_tokens.json +5 -2
  2. merges.txt +4 -0
  3. tokenizer.json +0 -0
  4. tokenizer_config.json +24 -16
  5. vocab.json +0 -0
added_tokens.json CHANGED
@@ -4,6 +4,9 @@
4
  "<pad>": 8003,
5
  "<s>": 8000,
6
  "<unk>": 8002,
7
- "<|assistant|>": 8006,
8
- "<|endoftext|>": 8005
 
 
 
9
  }
 
4
  "<pad>": 8003,
5
  "<s>": 8000,
6
  "<unk>": 8002,
7
+ "<|assistant|>": 8008,
8
+ "<|endoftext|>": 8005,
9
+ "<|end|>": 8007,
10
+ "<|system|>": 8009,
11
+ "<|user|>": 8006
12
  }
merges.txt CHANGED
@@ -7739,3 +7739,7 @@ bal anced
7739
  Ġoverwhel med
7740
  Ġlic ense
7741
  Ġflood ing
 
 
 
 
 
7739
  Ġoverwhel med
7740
  Ġlic ense
7741
  Ġflood ing
7742
+ Ġcrow d
7743
+ Ġtow ns
7744
+ ĠEle phant
7745
+ Ġwond ers
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,22 +1,6 @@
1
  {
2
  "add_prefix_space": false,
3
  "added_tokens_decoder": {
4
- "2": {
5
- "content": "<|end|>",
6
- "lstrip": false,
7
- "normalized": false,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "3": {
13
- "content": "<|user|>",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
  "8000": {
21
  "content": "<s>",
22
  "lstrip": false,
@@ -66,12 +50,36 @@
66
  "special": true
67
  },
68
  "8006": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  "content": "<|assistant|>",
70
  "lstrip": false,
71
  "normalized": true,
72
  "rstrip": false,
73
  "single_word": false,
74
  "special": false
 
 
 
 
 
 
 
 
75
  }
76
  },
77
  "bos_token": "<|user|>",
 
1
  {
2
  "add_prefix_space": false,
3
  "added_tokens_decoder": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  "8000": {
5
  "content": "<s>",
6
  "lstrip": false,
 
50
  "special": true
51
  },
52
  "8006": {
53
+ "content": "<|user|>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "8007": {
61
+ "content": "<|end|>",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "8008": {
69
  "content": "<|assistant|>",
70
  "lstrip": false,
71
  "normalized": true,
72
  "rstrip": false,
73
  "single_word": false,
74
  "special": false
75
+ },
76
+ "8009": {
77
+ "content": "<|system|>",
78
+ "lstrip": false,
79
+ "normalized": true,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": false
83
  }
84
  },
85
  "bos_token": "<|user|>",
vocab.json CHANGED
The diff for this file is too large to render. See raw diff