Sovego commited on
Commit
3384349
·
verified ·
1 Parent(s): d3cb173

Upload tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +9 -3
  2. tokenizer.json +1 -0
  3. tokenizer_config.json +21 -25
special_tokens_map.json CHANGED
@@ -9,15 +9,21 @@
9
  "eos_token": {
10
  "content": "<|endoftext|>",
11
  "lstrip": false,
12
- "normalized": true,
 
 
 
 
 
 
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
- "pad_token": "<|endoftext|>",
17
  "unk_token": {
18
  "content": "<|endoftext|>",
19
  "lstrip": false,
20
- "normalized": true,
21
  "rstrip": false,
22
  "single_word": false
23
  }
 
9
  "eos_token": {
10
  "content": "<|endoftext|>",
11
  "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|endoftext|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
  "rstrip": false,
21
  "single_word": false
22
  },
 
23
  "unk_token": {
24
  "content": "<|endoftext|>",
25
  "lstrip": false,
26
+ "normalized": false,
27
  "rstrip": false,
28
  "single_word": false
29
  }
tokenizer.json CHANGED
@@ -86,6 +86,7 @@
86
  "end_of_word_suffix": "</w>",
87
  "fuse_unk": false,
88
  "byte_fallback": false,
 
89
  "vocab": {
90
  "!": 0,
91
  "\"": 1,
 
86
  "end_of_word_suffix": "</w>",
87
  "fuse_unk": false,
88
  "byte_fallback": false,
89
+ "ignore_merges": false,
90
  "vocab": {
91
  "!": 0,
92
  "\"": 1,
tokenizer_config.json CHANGED
@@ -1,34 +1,30 @@
1
  {
2
  "add_prefix_space": false,
3
- "bos_token": {
4
- "__type": "AddedToken",
5
- "content": "<|startoftext|>",
6
- "lstrip": false,
7
- "normalized": true,
8
- "rstrip": false,
9
- "single_word": false
 
 
 
 
 
 
 
 
 
 
10
  },
 
 
11
  "do_lower_case": true,
12
- "eos_token": {
13
- "__type": "AddedToken",
14
- "content": "<|endoftext|>",
15
- "lstrip": false,
16
- "normalized": true,
17
- "rstrip": false,
18
- "single_word": false
19
- },
20
  "errors": "replace",
21
  "model_max_length": 77,
22
- "name_or_path": "/home/forssh/workspace/clip-finetuned",
23
  "pad_token": "<|endoftext|>",
24
- "special_tokens_map_file": "/home/forssh/workspace/clip-finetuned/special_tokens_map.json",
25
  "tokenizer_class": "CLIPTokenizer",
26
- "unk_token": {
27
- "__type": "AddedToken",
28
- "content": "<|endoftext|>",
29
- "lstrip": false,
30
- "normalized": true,
31
- "rstrip": false,
32
- "single_word": false
33
- }
34
  }
 
1
  {
2
  "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "49406": {
5
+ "content": "<|startoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "49407": {
13
+ "content": "<|endoftext|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ }
20
  },
21
+ "bos_token": "<|startoftext|>",
22
+ "clean_up_tokenization_spaces": true,
23
  "do_lower_case": true,
24
+ "eos_token": "<|endoftext|>",
 
 
 
 
 
 
 
25
  "errors": "replace",
26
  "model_max_length": 77,
 
27
  "pad_token": "<|endoftext|>",
 
28
  "tokenizer_class": "CLIPTokenizer",
29
+ "unk_token": "<|endoftext|>"
 
 
 
 
 
 
 
30
  }