minpeter commited on
Commit
db77928
·
verified ·
1 Parent(s): 025bbdb

diff for compatibility

Browse files
README.md CHANGED
@@ -11,138 +11,23 @@ library_name: transformers
11
  license: apache-2.0
12
  ---
13
 
14
- ## Citation
15
- arxiv.org/abs/2408.07246
 
 
 
16
 
17
- ```
18
- @inproceedings{li2025chemvlm,
19
- title={Chemvlm: Exploring the power of multimodal large language models in chemistry area},
20
- author={Li, Junxian and Zhang, Di and Wang, Xunzhi and Hao, Zeying and Lei, Jingdi and Tan, Qian and Zhou, Cai and Liu, Wei and Yang, Yaotian and Xiong, Xinrui and others},
21
- booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
22
- volume={39},
23
- number={1},
24
- pages={415--423},
25
- year={2025}
26
- }
27
- ```
28
 
29
- Codebase and datasets can be found at https://github.com/AI4Chem/ChemVlm.
30
 
31
- Quick start as below(```transformers>=4.37.0 is needed```)
32
- Update: You may also need
33
- ```
34
- pip install sentencepiece
35
- pip install einops
36
- pip install timm
37
- pip install accelerate>=0.26.0
38
- ```
39
 
40
- Code:
41
 
42
- ```Python
43
- from transformers import AutoTokenizer, AutoModelforCasualLM
44
- import torch
45
- import torchvision.transforms as T
46
- import transformers
47
- from torchvision.transforms.functional import InterpolationMode
48
 
 
49
 
50
- IMAGENET_MEAN = (0.485, 0.456, 0.406)
51
- IMAGENET_STD = (0.229, 0.224, 0.225)
52
 
53
- IMAGENET_MEAN = (0.485, 0.456, 0.406)
54
- IMAGENET_STD = (0.229, 0.224, 0.225)
55
-
56
-
57
- def build_transform(input_size):
58
- MEAN, STD = IMAGENET_MEAN, IMAGENET_STD
59
- transform = T.Compose([
60
- T.Lambda(lambda img: img.convert('RGB') if img.mode != 'RGB' else img),
61
- T.Resize((input_size, input_size), interpolation=InterpolationMode.BICUBIC),
62
- T.ToTensor(),
63
- T.Normalize(mean=MEAN, std=STD)
64
- ])
65
- return transform
66
-
67
-
68
- def find_closest_aspect_ratio(aspect_ratio, target_ratios, width, height, image_size):
69
- best_ratio_diff = float('inf')
70
- best_ratio = (1, 1)
71
- area = width * height
72
- for ratio in target_ratios:
73
- target_aspect_ratio = ratio[0] / ratio[1]
74
- ratio_diff = abs(aspect_ratio - target_aspect_ratio)
75
- if ratio_diff < best_ratio_diff:
76
- best_ratio_diff = ratio_diff
77
- best_ratio = ratio
78
- elif ratio_diff == best_ratio_diff:
79
- if area > 0.5 * image_size * image_size * ratio[0] * ratio[1]:
80
- best_ratio = ratio
81
- return best_ratio
82
-
83
-
84
- def dynamic_preprocess(image, min_num=1, max_num=6, image_size=448, use_thumbnail=False):
85
- orig_width, orig_height = image.size
86
- aspect_ratio = orig_width / orig_height
87
-
88
- # calculate the existing image aspect ratio
89
- target_ratios = set(
90
- (i, j) for n in range(min_num, max_num + 1) for i in range(1, n + 1) for j in range(1, n + 1) if
91
- i * j <= max_num and i * j >= min_num)
92
- target_ratios = sorted(target_ratios, key=lambda x: x[0] * x[1])
93
-
94
- # find the closest aspect ratio to the target
95
- target_aspect_ratio = find_closest_aspect_ratio(
96
- aspect_ratio, target_ratios, orig_width, orig_height, image_size)
97
-
98
- # calculate the target width and height
99
- target_width = image_size * target_aspect_ratio[0]
100
- target_height = image_size * target_aspect_ratio[1]
101
- blocks = target_aspect_ratio[0] * target_aspect_ratio[1]
102
-
103
- # resize the image
104
- resized_img = image.resize((target_width, target_height))
105
- processed_images = []
106
- for i in range(blocks):
107
- box = (
108
- (i % (target_width // image_size)) * image_size,
109
- (i // (target_width // image_size)) * image_size,
110
- ((i % (target_width // image_size)) + 1) * image_size,
111
- ((i // (target_width // image_size)) + 1) * image_size
112
- )
113
- # split the image
114
- split_img = resized_img.crop(box)
115
- processed_images.append(split_img)
116
- assert len(processed_images) == blocks
117
- if use_thumbnail and len(processed_images) != 1:
118
- thumbnail_img = image.resize((image_size, image_size))
119
- processed_images.append(thumbnail_img)
120
- return processed_images
121
-
122
-
123
- def load_image(image_file, input_size=448, max_num=6):
124
- image = Image.open(image_file).convert('RGB')
125
- transform = build_transform(input_size=input_size)
126
- images = dynamic_preprocess(image, image_size=input_size, use_thumbnail=True, max_num=max_num)
127
- pixel_values = [transform(image) for image in images]
128
- pixel_values = torch.stack(pixel_values)
129
- return pixel_values
130
-
131
- tokenizer = AutoTokenizer.from_pretrained('AI4Chem/ChemVLM-26B-1-2', trust_remote_code=True)
132
-
133
- query = "Please describe the molecule in the image."
134
- image_path = "your image path"
135
- pixel_values = load_image(image_path, max_num=6).to(torch.bfloat16).cuda()
136
-
137
-
138
- model = AutoModelForCausalLM.from_pretrained(
139
- "AI4Chem/ChemVLM-26B-1-2",
140
- torch_dtype=torch.bfloat16,
141
- low_cpu_mem_usage=True,
142
- trust_remote_code=True
143
- ).to(device).eval().cuda()
144
-
145
- gen_kwargs = {"max_length": 1000, "do_sample": True, "temperature": 0.7, "top_p": 0.9}
146
-
147
- response = model.chat(tokenizer, pixel_values, query, gen_kwargs)
148
- ```
 
11
  license: apache-2.0
12
  ---
13
 
14
+ <!-- header start -->
15
+ <p align="center">
16
+ <img src="https://huggingface.co/datasets/FriendliAI/documentation-images/resolve/main/model-card-assets/friendliai.png" width="100%" alt="FriendliAI Logo">
17
+ </p>
18
+ <!-- header end -->
19
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
+ # AI4Chem/ChemVLM-26B-1-2
22
 
23
+ * Model creator: [AI4Chem](https://huggingface.co/AI4Chem)
24
+ * Original model: [ChemVLM-26B-1-2](https://huggingface.co/AI4Chem/ChemVLM-26B-1-2)
 
 
 
 
 
 
25
 
26
+ ## Differences
27
 
28
+ * Added missing eos_token (`<|im_end|>`) to config.json
 
 
 
 
 
29
 
30
+ ## License
31
 
32
+ Refer to the license of the original model card.
 
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
chat_template.jinja ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {{- bos_token -}}
2
+ {%- for message in messages -%}
3
+ {{- "<|im_start|>" + message["role"] + "\n" + message["content"] + "<|im_end|>" + "\n" -}}
4
+ {%- endfor -%}
5
+ {%- if add_generation_prompt -%}
6
+ {{- "<|im_start|>assistant\n" -}}
7
+ {%- endif -%}
config.json CHANGED
@@ -35,7 +35,10 @@
35
  "do_sample": false,
36
  "early_stopping": false,
37
  "encoder_no_repeat_ngram_size": 0,
38
- "eos_token_id": 2,
 
 
 
39
  "exponential_decay_length_penalty": null,
40
  "finetuning_task": null,
41
  "forced_bos_token_id": null,
@@ -197,4 +200,4 @@
197
  "use_bfloat16": true,
198
  "use_flash_attn": true
199
  }
200
- }
 
35
  "do_sample": false,
36
  "early_stopping": false,
37
  "encoder_no_repeat_ngram_size": 0,
38
+ "eos_token_id": [
39
+ 2,
40
+ 92542
41
+ ],
42
  "exponential_decay_length_penalty": null,
43
  "finetuning_task": null,
44
  "forced_bos_token_id": null,
 
200
  "use_bfloat16": true,
201
  "use_flash_attn": true
202
  }
203
+ }
generation_config.json CHANGED
@@ -1,4 +1,10 @@
1
  {
2
  "_from_model_config": true,
 
 
 
 
 
 
3
  "transformers_version": "4.44.2"
4
- }
 
1
  {
2
  "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": [
5
+ 2,
6
+ 92542
7
+ ],
8
+ "pad_token_id": 2,
9
  "transformers_version": "4.44.2"
10
+ }
tokenizer_config.json CHANGED
@@ -163,11 +163,10 @@
163
  ]
164
  },
165
  "bos_token": "<s>",
166
- "chat_template": "{{ bos_token }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
167
  "clean_up_tokenization_spaces": false,
168
- "eos_token": "</s>",
169
  "model_max_length": 2048,
170
  "pad_token": "</s>",
171
  "tokenizer_class": "InternLM2Tokenizer",
172
  "unk_token": "<unk>"
173
- }
 
163
  ]
164
  },
165
  "bos_token": "<s>",
 
166
  "clean_up_tokenization_spaces": false,
167
+ "eos_token": "<|im_end|>",
168
  "model_max_length": 2048,
169
  "pad_token": "</s>",
170
  "tokenizer_class": "InternLM2Tokenizer",
171
  "unk_token": "<unk>"
172
+ }