shawn0wang commited on
Commit
0d73d3b
·
verified ·
1 Parent(s): 6226093

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +58 -15
README.md CHANGED
@@ -247,26 +247,69 @@ The model follows a connection pattern of Vision Encoder → MLP Adapter → Lan
247
  ---
248
 
249
 
250
- ## 5. Quick Start
251
 
252
- This section describes how to quickly install, configure, and run the Skywork-R1V model.
 
 
 
 
253
 
254
- **Example Steps:**
255
 
256
- 1. **Clone GitHub repository**
257
- ```bash
258
- git clone https://github.com/your-repo
259
- ```
260
 
261
- 2. **Install dependencies**
262
- ```bash
263
- cd your-repo
264
- pip install -r requirements.txt
265
- ```
266
 
267
- 3. **Run example code**
268
- ```bash
269
- python demo.py
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
  ```
271
 
272
  ---
 
247
  ---
248
 
249
 
250
+ ## 5. Usage
251
 
252
+ ```python
253
+ from transformers import AutoTokenizer, AutoConfig, AutoModel, CLIPImageProcessor
254
+ from utils_ import split_model, load_image
255
+ import sys, os
256
+ import torch
257
 
 
258
 
259
+ path = 'Skywork/Skywork-R1V-38B'
260
+ image_path = "/path/to/image"
 
 
261
 
 
 
 
 
 
262
 
263
+ device_map, visible_devices = split_model(path)
264
+ tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
265
+ model = AutoModel.from_pretrained(
266
+ path,
267
+ torch_dtype=torch.bfloat16,
268
+ load_in_8bit=False,
269
+ low_cpu_mem_usage=True,
270
+ use_flash_attn=True,
271
+ trust_remote_code=True,
272
+ device_map=device_map).eval()
273
+
274
+ generation_config = dict(max_new_tokens=64000, do_sample=True, temperature=0.6, top_p=0.95, repetition_penalty=1.05)
275
+ pixel_values = load_image(image_path, max_num=12).to(torch.bfloat16).cuda()
276
+
277
+ # pure-text conversation (纯文本对话)
278
+ question = 'If all cats can fly, and Tom is a cat, can Tom fly?'
279
+ response = model.chat(tokenizer, None, question, generation_config, history=None)
280
+ print(f'User: {question}\nAssistant: {response}')
281
+
282
+ # single-image single-round conversation (单图单轮对话)
283
+ question = '<image>\nSelect the correct option from this question.'
284
+ response = model.chat(tokenizer, pixel_values, question, generation_config)
285
+ print(f'User: {question}\nAssistant: {response}')
286
+
287
+ # single-image multi-round conversation (单图多轮对话)
288
+ question = '<image>\nSelect the correct option from this question.'
289
+ response, history = model.chat(tokenizer, pixel_values, question, generation_config, history=None, return_history=True)
290
+ print(f'User: {question}\nAssistant: {response}')
291
+
292
+ question = 'What if the height in the question is changed to 0.5?'
293
+ response, history = model.chat(tokenizer, pixel_values, question, generation_config, history=history, return_history=True)
294
+ print(f'User: {question}\nAssistant: {response}')
295
+
296
+ # multi-image multi-round conversation, separate images (多图多轮对话,独立图像)
297
+ pixel_values1 = load_image('./examples/image1.jpg', max_num=12).to(torch.bfloat16).cuda()
298
+ pixel_values2 = load_image('./examples/image2.jpg', max_num=12).to(torch.bfloat16).cuda()
299
+ pixel_values = torch.cat((pixel_values1, pixel_values2), dim=0)
300
+ num_patches_list = [pixel_values1.size(0), pixel_values2.size(0)]
301
+
302
+ question = '<image>\n<image>\nSelect the correct option from this question.'
303
+ response, history = model.chat(tokenizer, pixel_values, question, generation_config,
304
+ num_patches_list=num_patches_list,
305
+ history=None, return_history=True)
306
+ print(f'User: {question}\nAssistant: {response}')
307
+
308
+ question = 'What if the height in the question is changed to 0.5?'
309
+ response, history = model.chat(tokenizer, pixel_values, question, generation_config,
310
+ num_patches_list=num_patches_list,
311
+ history=history, return_history=True)
312
+ print(f'User: {question}\nAssistant: {response}')
313
  ```
314
 
315
  ---