jiangbop commited on
Commit
97301f8
·
verified ·
1 Parent(s): d6fdcc7

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +1 -73
README.md CHANGED
@@ -55,8 +55,6 @@ We provide an example code to run `SkyworkVL-2B` using `transformers`
55
 
56
  ### Model Loading
57
 
58
- #### 16-bit(bf16 / fp16)
59
-
60
  ```python
61
  import torch
62
  from transformers import AutoTokenizer, AutoModel
@@ -69,21 +67,6 @@ model = AutoModel.from_pretrained(
69
  trust_remote_code=True).eval().cuda()
70
  ```
71
 
72
- #### BNB 8-bit Quantization
73
-
74
- ```python
75
- import torch
76
- from transformers import AutoTokenizer, AutoModel
77
- path = "Skywork/SkyworkVL-2B"
78
- model = AutoModel.from_pretrained(
79
- path,
80
- torch_dtype=torch.bfloat16,
81
- load_in_8bit=True,
82
- low_cpu_mem_usage=True,
83
- use_flash_attn=True,
84
- trust_remote_code=True).eval()
85
-
86
- ```
87
 
88
  ### Inference with Transformers
89
 
@@ -221,66 +204,11 @@ question = 'Can you explain quantum mechanics to me?'
221
  response, history = model.chat(tokenizer, None, question, generation_config, history=history, return_history=True)
222
  print(f'User: {question}\nAssistant: {response}')
223
 
224
- # single-image single-round conversation (单张图片单轮对话)
225
  question = '<image>\nWhat do you see in this image?'
226
  response = model.chat(tokenizer, pixel_values, question, generation_config)
227
  print(f'User: {question}\nAssistant: {response}')
228
 
229
- # single-image multi-round conversation (单张图片多轮对话)
230
- question = '<image>\nCan you provide a detailed description of the image?'
231
- response, history = model.chat(tokenizer, pixel_values, question, generation_config, history=None, return_history=True)
232
- print(f'User: {question}\nAssistant: {response}')
233
-
234
- question = 'Based on the image, can you create a short story?'
235
- response, history = model.chat(tokenizer, pixel_values, question, generation_config, history=history, return_history=True)
236
- print(f'User: {question}\nAssistant: {response}')
237
-
238
- # multi-image multi-round conversation, combined images (多张图片多轮对话, 拼接图片)
239
- pixel_values1 = load_image('./demo/image1.jpg', max_num=12).to(torch.bfloat16).cuda()
240
- pixel_values2 = load_image('./demo/image2.jpg', max_num=12).to(torch.bfloat16).cuda()
241
- pixel_values = torch.cat((pixel_values1, pixel_values2), dim=0)
242
-
243
- question = '<image>\nDescribe the two images in detail.'
244
- response, history = model.chat(tokenizer, pixel_values, question, generation_config,
245
- history=None, return_history=True)
246
- print(f'User: {question}\nAssistant: {response}')
247
-
248
- question = 'What are the main differences between these two images?'
249
- response, history = model.chat(tokenizer, pixel_values, question, generation_config,
250
- history=history, return_history=True)
251
- print(f'User: {question}\nAssistant: {response}')
252
-
253
- # multi-image multi-round conversation, separate images (多张图片多轮对话, 分割图片)
254
- pixel_values1 = load_image('./demo/image1.jpg', max_num=12).to(torch.bfloat16).cuda()
255
- pixel_values2 = load_image('./demo/image2.jpg', max_num=12).to(torch.bfloat16).cuda()
256
- pixel_values = torch.cat((pixel_values1, pixel_values2), dim=0)
257
- num_patches_list = [pixel_values1.size(0), pixel_values2.size(0)]
258
-
259
- question = 'Image-1: <image>\nImage-2: <image>\nDescribe the two images in detail.'
260
- response, history = model.chat(tokenizer, pixel_values, question, generation_config,
261
- num_patches_list=num_patches_list,
262
- history=None, return_history=True)
263
- print(f'User: {question}\nAssistant: {response}')
264
-
265
- question = 'What are the similarities between these two images?'
266
- response, history = model.chat(tokenizer, pixel_values, question, generation_config,
267
- num_patches_list=num_patches_list,
268
- history=history, return_history=True)
269
- print(f'User: {question}\nAssistant: {response}')
270
-
271
- # batch inference, single image per sample (批量推理, 每条数据一张图片)
272
- pixel_values1 = load_image('./demo/image1.jpg', max_num=12).to(torch.bfloat16).cuda()
273
- pixel_values2 = load_image('./demo/image2.jpg', max_num=12).to(torch.bfloat16).cuda()
274
- num_patches_list = [pixel_values1.size(0), pixel_values2.size(0)]
275
- pixel_values = torch.cat((pixel_values1, pixel_values2), dim=0)
276
-
277
- questions = ['<image>\nDescribe the image in detail.'] * len(num_patches_list)
278
- responses = model.batch_chat(tokenizer, pixel_values,
279
- num_patches_list=num_patches_list,
280
- questions=questions,
281
- generation_config=generation_config)
282
- for question, response in zip(questions, responses):
283
- print(f'User: {question}\nAssistant: {response}')
284
  ```
285
 
286
  ## Citation
 
55
 
56
  ### Model Loading
57
 
 
 
58
  ```python
59
  import torch
60
  from transformers import AutoTokenizer, AutoModel
 
67
  trust_remote_code=True).eval().cuda()
68
  ```
69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
  ### Inference with Transformers
72
 
 
204
  response, history = model.chat(tokenizer, None, question, generation_config, history=history, return_history=True)
205
  print(f'User: {question}\nAssistant: {response}')
206
 
207
+ # image-text conversation (单张图片单轮对话)
208
  question = '<image>\nWhat do you see in this image?'
209
  response = model.chat(tokenizer, pixel_values, question, generation_config)
210
  print(f'User: {question}\nAssistant: {response}')
211
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
  ```
213
 
214
  ## Citation