albertvillanova HF Staff commited on
Commit
a76046c
·
verified ·
1 Parent(s): 764849c

Upload processor

Browse files
Files changed (3) hide show
  1. processor_config.json +27 -0
  2. tokenizer.json +7 -17
  3. tokenizer_config.json +5 -49
processor_config.json CHANGED
@@ -1,4 +1,31 @@
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "image_token": "<image>",
3
  "num_additional_image_tokens": 1,
4
  "patch_size": 14,
 
1
  {
2
+ "image_processor": {
3
+ "crop_size": {
4
+ "height": 336,
5
+ "width": 336
6
+ },
7
+ "do_center_crop": true,
8
+ "do_convert_rgb": true,
9
+ "do_normalize": true,
10
+ "do_rescale": true,
11
+ "do_resize": true,
12
+ "image_mean": [
13
+ 0.48145466,
14
+ 0.4578275,
15
+ 0.40821073
16
+ ],
17
+ "image_processor_type": "CLIPImageProcessor",
18
+ "image_std": [
19
+ 0.26862954,
20
+ 0.26130258,
21
+ 0.27577711
22
+ ],
23
+ "resample": 3,
24
+ "rescale_factor": 0.00392156862745098,
25
+ "size": {
26
+ "shortest_edge": 336
27
+ }
28
+ },
29
  "image_token": "<image>",
30
  "num_additional_image_tokens": 1,
31
  "patch_size": 14,
tokenizer.json CHANGED
@@ -49,23 +49,13 @@
49
  "special": true
50
  }
51
  ],
52
- "normalizer": {
53
- "type": "Sequence",
54
- "normalizers": [
55
- {
56
- "type": "Prepend",
57
- "prepend": "▁"
58
- },
59
- {
60
- "type": "Replace",
61
- "pattern": {
62
- "String": " "
63
- },
64
- "content": "▁"
65
- }
66
- ]
67
  },
68
- "pre_tokenizer": null,
69
  "post_processor": {
70
  "type": "TemplateProcessing",
71
  "single": [
@@ -147,7 +137,7 @@
147
  "model": {
148
  "type": "BPE",
149
  "dropout": null,
150
- "unk_token": "<unk>",
151
  "continuing_subword_prefix": null,
152
  "end_of_word_suffix": null,
153
  "fuse_unk": true,
 
49
  "special": true
50
  }
51
  ],
52
+ "normalizer": null,
53
+ "pre_tokenizer": {
54
+ "type": "Metaspace",
55
+ "replacement": "▁",
56
+ "prepend_scheme": "first",
57
+ "split": false
 
 
 
 
 
 
 
 
 
58
  },
 
59
  "post_processor": {
60
  "type": "TemplateProcessing",
61
  "single": [
 
137
  "model": {
138
  "type": "BPE",
139
  "dropout": null,
140
+ "unk_token": null,
141
  "continuing_subword_prefix": null,
142
  "end_of_word_suffix": null,
143
  "fuse_unk": true,
tokenizer_config.json CHANGED
@@ -1,64 +1,20 @@
1
  {
2
- "add_bos_token": true,
3
- "add_eos_token": false,
4
  "add_prefix_space": null,
5
- "added_tokens_decoder": {
6
- "0": {
7
- "content": "<unk>",
8
- "lstrip": false,
9
- "normalized": false,
10
- "rstrip": false,
11
- "single_word": false,
12
- "special": true
13
- },
14
- "1": {
15
- "content": "<s>",
16
- "lstrip": false,
17
- "normalized": false,
18
- "rstrip": false,
19
- "single_word": false,
20
- "special": true
21
- },
22
- "2": {
23
- "content": "</s>",
24
- "lstrip": false,
25
- "normalized": false,
26
- "rstrip": false,
27
- "single_word": false,
28
- "special": true
29
- },
30
- "32000": {
31
- "content": "<image>",
32
- "lstrip": false,
33
- "normalized": false,
34
- "rstrip": false,
35
- "single_word": false,
36
- "special": true
37
- },
38
- "32001": {
39
- "content": "<pad>",
40
- "lstrip": false,
41
- "normalized": false,
42
- "rstrip": false,
43
- "single_word": false,
44
- "special": true
45
- }
46
- },
47
  "bos_token": "<s>",
48
  "clean_up_tokenization_spaces": false,
49
  "eos_token": "</s>",
50
- "extra_special_tokens": {
51
- "image_token": "<image>"
52
- },
53
  "image_token": "<image>",
54
- "legacy": false,
55
  "model_max_length": 1000000000000000019884624838656,
 
 
 
56
  "pad_token": "<pad>",
57
  "padding_side": "left",
58
  "processor_class": "LlavaProcessor",
59
  "sp_model_kwargs": {},
60
  "tokenizer_class": "LlamaTokenizer",
61
- "trust_remote_code": false,
62
  "unk_token": "<unk>",
63
  "use_default_system_prompt": false
64
  }
 
1
  {
 
 
2
  "add_prefix_space": null,
3
+ "backend": "tokenizers",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  "bos_token": "<s>",
5
  "clean_up_tokenization_spaces": false,
6
  "eos_token": "</s>",
 
 
 
7
  "image_token": "<image>",
8
+ "is_local": false,
9
  "model_max_length": 1000000000000000019884624838656,
10
+ "model_specific_special_tokens": {
11
+ "image_token": "<image>"
12
+ },
13
  "pad_token": "<pad>",
14
  "padding_side": "left",
15
  "processor_class": "LlavaProcessor",
16
  "sp_model_kwargs": {},
17
  "tokenizer_class": "LlamaTokenizer",
 
18
  "unk_token": "<unk>",
19
  "use_default_system_prompt": false
20
  }